sentiment_insights 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,308 @@
1
+ require 'csv'
2
+ require 'time'
3
+
4
+ module SentimentInsights
5
+ module Export
6
+ class BaseExporter
7
+ attr_reader :result, :options, :analysis_type
8
+
9
+ def initialize(result, options = {})
10
+ @result = result
11
+ @options = default_options.merge(options)
12
+ @analysis_type = detect_analysis_type(result)
13
+ end
14
+
15
+ protected
16
+
17
+ def default_options
18
+ {
19
+ include_summary: true,
20
+ include_segments: true,
21
+ include_timestamp: true,
22
+ timestamp_format: "%Y-%m-%d %H:%M:%S UTC",
23
+ encoding: "UTF-8"
24
+ }
25
+ end
26
+
27
+ def detect_analysis_type(result)
28
+ return :entities if result.key?(:entities)
29
+ return :key_phrases if result.key?(:phrases)
30
+ return :sentiment if result.key?(:global_summary) || result.key?(:responses)
31
+ :unknown
32
+ end
33
+
34
+ def generate_filename(extension)
35
+ base_name = "#{analysis_type}_analysis"
36
+ timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
37
+ "#{base_name}_#{timestamp}.#{extension}"
38
+ end
39
+
40
+ # Dynamic segment column detection
41
+ def detect_all_segment_keys
42
+ responses = extract_responses_data
43
+ all_keys = Set.new
44
+
45
+ responses.each do |response|
46
+ segment_data = response[:segment] || response.dig(:segment) || {}
47
+ flatten_hash_keys(segment_data, all_keys)
48
+ end
49
+
50
+ all_keys.to_a.sort
51
+ end
52
+
53
+ def flatten_hash_keys(hash, keys_set, prefix = "")
54
+ return unless hash.is_a?(Hash)
55
+
56
+ hash.each do |key, value|
57
+ full_key = prefix.empty? ? key.to_s : "#{prefix}_#{key}"
58
+
59
+ if value.is_a?(Hash)
60
+ flatten_hash_keys(value, keys_set, full_key)
61
+ else
62
+ keys_set << full_key
63
+ end
64
+ end
65
+ end
66
+
67
+ def extract_segment_values(segment_data, all_keys)
68
+ return Array.new(all_keys.length) if segment_data.nil?
69
+
70
+ flattened = flatten_hash(segment_data)
71
+ all_keys.map { |key| flattened[key] || "" }
72
+ end
73
+
74
+ def flatten_hash(hash, prefix = "")
75
+ return {} unless hash.is_a?(Hash)
76
+
77
+ result = {}
78
+ hash.each do |key, value|
79
+ full_key = prefix.empty? ? key.to_s : "#{prefix}_#{key}"
80
+
81
+ if value.is_a?(Hash)
82
+ result.merge!(flatten_hash(value, full_key))
83
+ else
84
+ result[full_key] = value
85
+ end
86
+ end
87
+ result
88
+ end
89
+
90
+ # Extract responses data based on analysis type
91
+ def extract_responses_data
92
+ case analysis_type
93
+ when :sentiment
94
+ result[:responses] || []
95
+ when :entities
96
+ result[:responses] || []
97
+ when :key_phrases
98
+ result[:responses] || []
99
+ else
100
+ []
101
+ end
102
+ end
103
+
104
+ # Build headers for CSV/Excel
105
+ def build_headers
106
+ base_headers = case analysis_type
107
+ when :sentiment
108
+ %w[response_id text sentiment_label sentiment_score]
109
+ when :entities
110
+ %w[response_id text entities_found]
111
+ when :key_phrases
112
+ %w[response_id text sentiment sentiment_score key_phrases_found]
113
+ else
114
+ %w[response_id text]
115
+ end
116
+
117
+ # Add dynamic segment columns
118
+ segment_keys = detect_all_segment_keys
119
+ segment_headers = segment_keys.map { |key| "segment_#{key}" }
120
+
121
+ headers = base_headers + segment_headers
122
+ headers << "timestamp" if options[:include_timestamp]
123
+ headers
124
+ end
125
+
126
+ # Build row data for responses
127
+ def build_response_rows
128
+ responses = extract_responses_data
129
+ segment_keys = detect_all_segment_keys
130
+
131
+ responses.map.with_index do |response, index|
132
+ row = build_base_row(response, index)
133
+ row += extract_segment_values(response[:segment], segment_keys)
134
+ row << format_timestamp if options[:include_timestamp]
135
+ row
136
+ end
137
+ end
138
+
139
+ def build_base_row(response, index)
140
+ response_id = response[:id] || "r_#{index + 1}"
141
+ text = response[:sentence] || response[:answer] || ""
142
+
143
+ case analysis_type
144
+ when :sentiment
145
+ sentiment_label = response[:sentiment_label] || response[:sentiment] || ""
146
+ sentiment_score = response[:sentiment_score] || 0.0
147
+ [response_id, text, sentiment_label, sentiment_score]
148
+ when :entities
149
+ entities = format_entities_for_export(response)
150
+ [response_id, text, entities]
151
+ when :key_phrases
152
+ sentiment = response[:sentiment] || ""
153
+ sentiment_score = response[:sentiment_score] || 0.0
154
+ phrases = format_phrases_for_export(response)
155
+ [response_id, text, sentiment, sentiment_score, phrases]
156
+ else
157
+ [response_id, text]
158
+ end
159
+ end
160
+
161
+ def format_entities_for_export(response)
162
+ # This would be populated by entity extraction results
163
+ # Format: "entity1:TYPE1,entity2:TYPE2"
164
+ return "" unless response[:entities]
165
+
166
+ response[:entities].map do |entity|
167
+ "#{entity[:text]}:#{entity[:type]}"
168
+ end.join(",")
169
+ end
170
+
171
+ def format_phrases_for_export(response)
172
+ # This would be populated by key phrase extraction results
173
+ return "" unless response[:phrases]
174
+
175
+ response[:phrases].join(",")
176
+ end
177
+
178
+ def format_timestamp
179
+ Time.now.strftime(options[:timestamp_format])
180
+ end
181
+
182
+ # Summary statistics extraction
183
+ def extract_summary_data
184
+ case analysis_type
185
+ when :sentiment
186
+ extract_sentiment_summary
187
+ when :entities
188
+ extract_entities_summary
189
+ when :key_phrases
190
+ extract_phrases_summary
191
+ else
192
+ {}
193
+ end
194
+ end
195
+
196
+ def extract_sentiment_summary
197
+ return {} unless result[:global_summary]
198
+
199
+ summary = result[:global_summary]
200
+ {
201
+ "Total Responses" => summary[:total_count],
202
+ "Positive Count" => summary[:positive_count],
203
+ "Neutral Count" => summary[:neutral_count],
204
+ "Negative Count" => summary[:negative_count],
205
+ "Positive Percentage" => "#{summary[:positive_percentage]}%",
206
+ "Neutral Percentage" => "#{summary[:neutral_percentage]}%",
207
+ "Negative Percentage" => "#{summary[:negative_percentage]}%",
208
+ "Net Sentiment Score" => summary[:net_sentiment_score]
209
+ }
210
+ end
211
+
212
+ def extract_entities_summary
213
+ return {} unless result[:entities]
214
+
215
+ entities = result[:entities]
216
+ entity_types = entities.group_by { |e| e[:type] }
217
+
218
+ summary = {
219
+ "Total Unique Entities" => entities.length,
220
+ "Total Mentions" => entities.sum { |e| e[:mentions]&.length || 0 }
221
+ }
222
+
223
+ entity_types.each do |type, entities_of_type|
224
+ summary["#{type.capitalize} Count"] = entities_of_type.length
225
+ end
226
+
227
+ if entities.any?
228
+ most_mentioned = entities.max_by { |e| e[:mentions]&.length || 0 }
229
+ summary["Most Mentioned Entity"] = "#{most_mentioned[:entity]} (#{most_mentioned[:mentions]&.length || 0} times)"
230
+ end
231
+
232
+ summary
233
+ end
234
+
235
+ def extract_phrases_summary
236
+ return {} unless result[:phrases]
237
+
238
+ phrases = result[:phrases]
239
+ total_mentions = phrases.sum { |p| p[:mentions]&.length || 0 }
240
+
241
+ {
242
+ "Total Unique Phrases" => phrases.length,
243
+ "Total Mentions" => total_mentions,
244
+ "Average Mentions per Phrase" => phrases.empty? ? 0 : (total_mentions.to_f / phrases.length).round(2)
245
+ }
246
+ end
247
+
248
+ # Segment analysis data
249
+ def extract_segment_data
250
+ return [] unless result[:segment_summary] && options[:include_segments]
251
+
252
+ segment_data = []
253
+ result[:segment_summary].each do |segment_type, segments|
254
+ next if segment_type == :top_positive_comments || segment_type == :top_negative_comments
255
+
256
+ segments.each do |segment_value, stats|
257
+ segment_data << {
258
+ segment_type: segment_type.to_s.gsub('_', ' ').split.map(&:capitalize).join(' '),
259
+ segment_value: segment_value,
260
+ total_count: stats[:total_count],
261
+ positive_percentage: "#{stats[:positive_percentage]}%",
262
+ neutral_percentage: "#{stats[:neutral_percentage]}%",
263
+ negative_percentage: "#{stats[:negative_percentage]}%",
264
+ net_score: stats[:net_sentiment_score]
265
+ }
266
+ end
267
+ end
268
+
269
+ segment_data
270
+ end
271
+
272
+ # Apply filters if specified
273
+ def apply_filters(data)
274
+ return data unless options[:filter]
275
+
276
+ filtered_data = data.dup
277
+ filter_options = options[:filter] || {}
278
+
279
+ # Filter by sentiment
280
+ if filter_options[:sentiment]
281
+ allowed_sentiments = Array(filter_options[:sentiment])
282
+ filtered_data[:responses] = filtered_data[:responses]&.select do |response|
283
+ sentiment = response[:sentiment_label] || response[:sentiment]
284
+ allowed_sentiments.include?(sentiment&.to_sym)
285
+ end
286
+ end
287
+
288
+ # Filter by segments
289
+ if filter_options[:segments]
290
+ filtered_data[:responses] = filtered_data[:responses]&.select do |response|
291
+ segment = response[:segment] || {}
292
+ filter_options[:segments].all? do |key, values|
293
+ Array(values).include?(segment[key])
294
+ end
295
+ end
296
+ end
297
+
298
+ # Filter by date range (if timestamp is available)
299
+ if filter_options[:date_range] && filter_options[:date_range].is_a?(Range)
300
+ # This would require timestamp data in responses
301
+ # Implementation would depend on timestamp format
302
+ end
303
+
304
+ filtered_data
305
+ end
306
+ end
307
+ end
308
+ end
@@ -0,0 +1,261 @@
1
+ require_relative 'base_exporter'
2
+
3
+ module SentimentInsights
4
+ module Export
5
+ class CsvExporter < BaseExporter
6
+ def export(filename = nil)
7
+ filename ||= generate_filename("csv")
8
+
9
+ # Apply filters if specified
10
+ filtered_result = apply_filters(@result)
11
+
12
+ CSV.open(filename, "w", encoding: options[:encoding]) do |csv|
13
+ write_main_data(csv, filtered_result)
14
+
15
+ if options[:include_summary]
16
+ write_empty_row(csv)
17
+ write_summary_section(csv)
18
+ end
19
+
20
+ if options[:include_segments]
21
+ write_empty_row(csv)
22
+ write_segment_section(csv)
23
+ end
24
+ end
25
+
26
+ filename
27
+ end
28
+
29
+ private
30
+
31
+ def write_main_data(csv, data)
32
+ # Write headers
33
+ headers = build_headers
34
+ csv << headers
35
+
36
+ # Write response data
37
+ responses = case analysis_type
38
+ when :sentiment
39
+ data[:responses] || []
40
+ when :entities
41
+ build_entity_response_rows(data)
42
+ when :key_phrases
43
+ build_phrase_response_rows(data)
44
+ else
45
+ data[:responses] || []
46
+ end
47
+
48
+ write_response_rows(csv, responses)
49
+ end
50
+
51
+ def write_response_rows(csv, responses)
52
+ segment_keys = detect_all_segment_keys
53
+
54
+ responses.each_with_index do |response, index|
55
+ row = build_base_row(response, index)
56
+ row += extract_segment_values(response[:segment], segment_keys)
57
+ row << format_timestamp if options[:include_timestamp]
58
+ csv << row
59
+ end
60
+ end
61
+
62
+ def build_entity_response_rows(data)
63
+ responses = data[:responses] || []
64
+ entities_by_response = build_entities_lookup(data[:entities] || [])
65
+
66
+ responses.map do |response|
67
+ response_entities = entities_by_response[response[:id]] || []
68
+ response.merge(entities_found: format_entities_list(response_entities))
69
+ end
70
+ end
71
+
72
+ def build_phrase_response_rows(data)
73
+ responses = data[:responses] || []
74
+ phrases_by_response = build_phrases_lookup(data[:phrases] || [])
75
+
76
+ responses.map do |response|
77
+ response_phrases = phrases_by_response[response[:id]] || []
78
+ response.merge(phrases_found: response_phrases.join(","))
79
+ end
80
+ end
81
+
82
+ def build_entities_lookup(entities)
83
+ lookup = Hash.new { |h, k| h[k] = [] }
84
+
85
+ entities.each do |entity|
86
+ mentions = entity[:mentions] || []
87
+ mentions.each do |response_id|
88
+ lookup[response_id] << {
89
+ text: entity[:entity],
90
+ type: entity[:type]
91
+ }
92
+ end
93
+ end
94
+
95
+ lookup
96
+ end
97
+
98
+ def build_phrases_lookup(phrases)
99
+ lookup = Hash.new { |h, k| h[k] = [] }
100
+
101
+ phrases.each do |phrase|
102
+ mentions = phrase[:mentions] || []
103
+ mentions.each do |response_id|
104
+ lookup[response_id] << phrase[:phrase]
105
+ end
106
+ end
107
+
108
+ lookup
109
+ end
110
+
111
+ def format_entities_list(entities)
112
+ entities.map { |e| "#{e[:text]}:#{e[:type]}" }.join(",")
113
+ end
114
+
115
+ def write_summary_section(csv)
116
+ csv << ["SUMMARY STATISTICS"]
117
+
118
+ summary_data = extract_summary_data
119
+ summary_data.each do |key, value|
120
+ csv << [key, value]
121
+ end
122
+ end
123
+
124
+ def write_segment_section(csv)
125
+ segment_data = extract_segment_data
126
+ return if segment_data.empty?
127
+
128
+ csv << ["SEGMENT ANALYSIS"]
129
+ csv << ["Segment Type", "Segment Value", "Total Count", "Positive %", "Neutral %", "Negative %", "Net Score"]
130
+
131
+ segment_data.each do |segment|
132
+ csv << [
133
+ segment[:segment_type],
134
+ segment[:segment_value],
135
+ segment[:total_count],
136
+ segment[:positive_percentage],
137
+ segment[:neutral_percentage],
138
+ segment[:negative_percentage],
139
+ segment[:net_score]
140
+ ]
141
+ end
142
+ end
143
+
144
+ def write_empty_row(csv)
145
+ csv << []
146
+ end
147
+
148
+ # Enhanced entity export format
149
+ def write_entity_details_section(csv, data)
150
+ return unless analysis_type == :entities && data[:entities]
151
+
152
+ write_empty_row(csv)
153
+ csv << ["ENTITY DETAILS"]
154
+ csv << ["Entity", "Type", "Total Mentions", "Response IDs", "Segment Distribution"]
155
+
156
+ data[:entities].each do |entity|
157
+ segment_dist = build_segment_distribution(entity[:mentions] || [])
158
+ csv << [
159
+ entity[:entity],
160
+ entity[:type],
161
+ entity[:mentions]&.length || 0,
162
+ (entity[:mentions] || []).join(","),
163
+ format_segment_distribution(segment_dist)
164
+ ]
165
+ end
166
+ end
167
+
168
+ def write_phrase_details_section(csv, data)
169
+ return unless analysis_type == :key_phrases && data[:phrases]
170
+
171
+ write_empty_row(csv)
172
+ csv << ["PHRASE DETAILS"]
173
+ csv << ["Phrase", "Total Mentions", "Response IDs", "Sentiment Distribution", "Segment Distribution"]
174
+
175
+ data[:phrases].each do |phrase|
176
+ sentiment_dist = build_phrase_sentiment_distribution(phrase[:mentions] || [])
177
+ segment_dist = build_segment_distribution(phrase[:mentions] || [])
178
+
179
+ csv << [
180
+ phrase[:phrase],
181
+ phrase[:mentions]&.length || 0,
182
+ (phrase[:mentions] || []).join(","),
183
+ format_sentiment_distribution(sentiment_dist),
184
+ format_segment_distribution(segment_dist)
185
+ ]
186
+ end
187
+ end
188
+
189
+ def build_segment_distribution(response_ids)
190
+ distribution = Hash.new { |h, k| h[k] = Hash.new(0) }
191
+
192
+ response_ids.each do |response_id|
193
+ response = find_response_by_id(response_id)
194
+ next unless response && response[:segment]
195
+
196
+ response[:segment].each do |segment_type, segment_value|
197
+ distribution[segment_type][segment_value] += 1
198
+ end
199
+ end
200
+
201
+ distribution
202
+ end
203
+
204
+ def build_phrase_sentiment_distribution(response_ids)
205
+ distribution = Hash.new(0)
206
+
207
+ response_ids.each do |response_id|
208
+ response = find_response_by_id(response_id)
209
+ next unless response
210
+
211
+ sentiment = response[:sentiment] || response[:sentiment_label] || "neutral"
212
+ distribution[sentiment.to_s] += 1
213
+ end
214
+
215
+ distribution
216
+ end
217
+
218
+ def find_response_by_id(response_id)
219
+ responses = extract_responses_data
220
+ responses.find { |r| r[:id] == response_id }
221
+ end
222
+
223
+ def format_sentiment_distribution(distribution)
224
+ distribution.map { |sentiment, count| "#{sentiment}:#{count}" }.join(",")
225
+ end
226
+
227
+ def format_segment_distribution(distribution)
228
+ parts = []
229
+ distribution.each do |segment_type, values|
230
+ values.each do |value, count|
231
+ parts << "#{segment_type}_#{value}:#{count}"
232
+ end
233
+ end
234
+ parts.join(",")
235
+ end
236
+
237
+ # Override build_base_row for entities and key phrases
238
+ def build_base_row(response, index)
239
+ response_id = response[:id] || "r_#{index + 1}"
240
+ text = response[:answer] || response[:sentence] || ""
241
+
242
+ case analysis_type
243
+ when :sentiment
244
+ sentiment_label = response[:sentiment_label] || response[:sentiment] || ""
245
+ sentiment_score = response[:sentiment_score] || 0.0
246
+ [response_id, text, sentiment_label.to_s, sentiment_score.to_s]
247
+ when :entities
248
+ entities = response[:entities_found] || ""
249
+ [response_id, text, entities]
250
+ when :key_phrases
251
+ sentiment = response[:sentiment] || ""
252
+ sentiment_score = response[:sentiment_score] || 0.0
253
+ phrases = response[:phrases_found] || ""
254
+ [response_id, text, sentiment, sentiment_score, phrases]
255
+ else
256
+ [response_id, text]
257
+ end
258
+ end
259
+ end
260
+ end
261
+ end