sentiment_insights 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,334 @@
1
+ require_relative 'base_exporter'
2
+
3
+ begin
4
+ require 'rubyXL'
5
+ rescue LoadError
6
+ puts "Warning: rubyXL gem not found. Excel export will not be available."
7
+ puts "Install with: gem install rubyXL"
8
+ end
9
+
10
+ module SentimentInsights
11
+ module Export
12
+ class ExcelExporter < BaseExporter
13
+ def export(filename = nil)
14
+ raise "rubyXL gem is required for Excel export" unless defined?(RubyXL)
15
+
16
+ filename ||= generate_filename("xlsx")
17
+
18
+ # Apply filters if specified
19
+ filtered_result = apply_filters(@result)
20
+
21
+ workbook = RubyXL::Workbook.new
22
+
23
+ # Remove the default worksheet
24
+ workbook.worksheets.delete_at(0)
25
+
26
+ # Create worksheets based on analysis type and options
27
+ create_responses_sheet(workbook, filtered_result)
28
+ create_summary_sheet(workbook) if options[:include_summary]
29
+ create_segments_sheet(workbook) if options[:include_segments]
30
+
31
+ case analysis_type
32
+ when :entities
33
+ create_entities_sheet(workbook, filtered_result)
34
+ when :key_phrases
35
+ create_phrases_sheet(workbook, filtered_result)
36
+ end
37
+
38
+ workbook.write(filename)
39
+ filename
40
+ end
41
+
42
+ private
43
+
44
+ def create_responses_sheet(workbook, data)
45
+ worksheet = workbook.add_worksheet("Responses")
46
+
47
+ # Write headers
48
+ headers = build_headers
49
+ write_excel_row(worksheet, 0, headers, bold: true)
50
+
51
+ # Write data rows
52
+ write_response_data(worksheet, data, start_row: 1)
53
+
54
+ # Auto-size columns
55
+ auto_size_columns(worksheet, headers.length)
56
+
57
+ worksheet
58
+ end
59
+
60
+ def create_summary_sheet(workbook)
61
+ worksheet = workbook.add_worksheet("Summary")
62
+
63
+ summary_data = extract_summary_data
64
+ row = 0
65
+
66
+ # Title
67
+ worksheet.add_cell(row, 0, "SUMMARY STATISTICS")
68
+ row += 2
69
+
70
+ # Summary data
71
+ summary_data.each do |key, value|
72
+ worksheet.add_cell(row, 0, key)
73
+ worksheet.add_cell(row, 1, value)
74
+ row += 1
75
+ end
76
+
77
+ auto_size_columns(worksheet, 2)
78
+ worksheet
79
+ end
80
+
81
+ def create_segments_sheet(workbook)
82
+ segment_data = extract_segment_data
83
+ return unless segment_data.any?
84
+
85
+ worksheet = workbook.add_worksheet("Segment Analysis")
86
+
87
+ # Title
88
+ worksheet.add_cell(0, 0, "SEGMENT ANALYSIS")
89
+
90
+ # Headers
91
+ headers = ["Segment Type", "Segment Value", "Total Count", "Positive %", "Neutral %", "Negative %", "Net Score"]
92
+ write_excel_row(worksheet, 2, headers, bold: true)
93
+
94
+ # Data
95
+ segment_data.each_with_index do |segment, index|
96
+ row = 3 + index
97
+ write_excel_row(worksheet, row, [
98
+ segment[:segment_type],
99
+ segment[:segment_value],
100
+ segment[:total_count],
101
+ segment[:positive_percentage],
102
+ segment[:neutral_percentage],
103
+ segment[:negative_percentage],
104
+ segment[:net_score]
105
+ ])
106
+ end
107
+
108
+ auto_size_columns(worksheet, headers.length)
109
+ worksheet
110
+ end
111
+
112
+ def create_entities_sheet(workbook, data)
113
+ return unless data[:entities]
114
+
115
+ worksheet = workbook.add_worksheet("Entity Details")
116
+
117
+ # Title
118
+ worksheet.add_cell(0, 0, "ENTITY ANALYSIS")
119
+
120
+ # Headers
121
+ headers = ["Entity", "Type", "Total Mentions", "Response IDs", "Segment Distribution"]
122
+ write_excel_row(worksheet, 2, headers, bold: true)
123
+
124
+ # Data
125
+ data[:entities].each_with_index do |entity, index|
126
+ row = 3 + index
127
+ segment_dist = build_segment_distribution(entity[:mentions] || [])
128
+
129
+ write_excel_row(worksheet, row, [
130
+ entity[:entity],
131
+ entity[:type],
132
+ entity[:mentions]&.length || 0,
133
+ (entity[:mentions] || []).join(", "),
134
+ format_segment_distribution(segment_dist)
135
+ ])
136
+ end
137
+
138
+ auto_size_columns(worksheet, headers.length)
139
+ worksheet
140
+ end
141
+
142
+ def create_phrases_sheet(workbook, data)
143
+ return unless data[:phrases]
144
+
145
+ worksheet = workbook.add_worksheet("Phrase Details")
146
+
147
+ # Title
148
+ worksheet.add_cell(0, 0, "KEY PHRASE ANALYSIS")
149
+
150
+ # Headers
151
+ headers = ["Phrase", "Total Mentions", "Response IDs", "Sentiment Distribution", "Segment Distribution"]
152
+ write_excel_row(worksheet, 2, headers, bold: true)
153
+
154
+ # Data
155
+ data[:phrases].each_with_index do |phrase, index|
156
+ row = 3 + index
157
+ sentiment_dist = build_phrase_sentiment_distribution(phrase[:mentions] || [])
158
+ segment_dist = build_segment_distribution(phrase[:mentions] || [])
159
+
160
+ write_excel_row(worksheet, row, [
161
+ phrase[:phrase],
162
+ phrase[:mentions]&.length || 0,
163
+ (phrase[:mentions] || []).join(", "),
164
+ format_sentiment_distribution(sentiment_dist),
165
+ format_segment_distribution(segment_dist)
166
+ ])
167
+ end
168
+
169
+ auto_size_columns(worksheet, headers.length)
170
+ worksheet
171
+ end
172
+
173
+ def write_response_data(worksheet, data, start_row: 0)
174
+ segment_keys = detect_all_segment_keys
175
+
176
+ responses = case analysis_type
177
+ when :sentiment
178
+ data[:responses] || []
179
+ when :entities
180
+ build_entity_response_rows(data)
181
+ when :key_phrases
182
+ build_phrase_response_rows(data)
183
+ else
184
+ data[:responses] || []
185
+ end
186
+
187
+ responses.each_with_index do |response, index|
188
+ row = start_row + index
189
+ row_data = build_base_row(response, index)
190
+ row_data += extract_segment_values(response[:segment], segment_keys)
191
+ row_data << format_timestamp if options[:include_timestamp]
192
+
193
+ write_excel_row(worksheet, row, row_data)
194
+ end
195
+ end
196
+
197
+ def write_excel_row(worksheet, row_index, data, bold: false)
198
+ data.each_with_index do |value, col_index|
199
+ worksheet.add_cell(row_index, col_index, value)
200
+ end
201
+ end
202
+
203
+ def auto_size_columns(worksheet, column_count)
204
+ # Basic implementation - rubyXL doesn't have auto-sizing
205
+ # Column width is set to a reasonable default
206
+ end
207
+
208
+ # Reuse methods from CSV exporter
209
+ def build_entity_response_rows(data)
210
+ responses = data[:responses] || []
211
+ entities_by_response = build_entities_lookup(data[:entities] || [])
212
+
213
+ responses.map do |response|
214
+ response_entities = entities_by_response[response[:id]] || []
215
+ response.merge(entities_found: format_entities_list(response_entities))
216
+ end
217
+ end
218
+
219
+ def build_phrase_response_rows(data)
220
+ responses = data[:responses] || []
221
+ phrases_by_response = build_phrases_lookup(data[:phrases] || [])
222
+
223
+ responses.map do |response|
224
+ response_phrases = phrases_by_response[response[:id]] || []
225
+ response.merge(phrases_found: response_phrases.join(", "))
226
+ end
227
+ end
228
+
229
+ def build_entities_lookup(entities)
230
+ lookup = Hash.new { |h, k| h[k] = [] }
231
+
232
+ entities.each do |entity|
233
+ mentions = entity[:mentions] || []
234
+ mentions.each do |response_id|
235
+ lookup[response_id] << {
236
+ text: entity[:entity],
237
+ type: entity[:type]
238
+ }
239
+ end
240
+ end
241
+
242
+ lookup
243
+ end
244
+
245
+ def build_phrases_lookup(phrases)
246
+ lookup = Hash.new { |h, k| h[k] = [] }
247
+
248
+ phrases.each do |phrase|
249
+ mentions = phrase[:mentions] || []
250
+ mentions.each do |response_id|
251
+ lookup[response_id] << phrase[:phrase]
252
+ end
253
+ end
254
+
255
+ lookup
256
+ end
257
+
258
+ def format_entities_list(entities)
259
+ entities.map { |e| "#{e[:text]}:#{e[:type]}" }.join(", ")
260
+ end
261
+
262
+ def build_segment_distribution(response_ids)
263
+ distribution = Hash.new { |h, k| h[k] = Hash.new(0) }
264
+
265
+ response_ids.each do |response_id|
266
+ response = find_response_by_id(response_id)
267
+ next unless response && response[:segment]
268
+
269
+ response[:segment].each do |segment_type, segment_value|
270
+ distribution[segment_type][segment_value] += 1
271
+ end
272
+ end
273
+
274
+ distribution
275
+ end
276
+
277
+ def build_phrase_sentiment_distribution(response_ids)
278
+ distribution = Hash.new(0)
279
+
280
+ response_ids.each do |response_id|
281
+ response = find_response_by_id(response_id)
282
+ next unless response
283
+
284
+ sentiment = response[:sentiment] || response[:sentiment_label] || "neutral"
285
+ distribution[sentiment.to_s] += 1
286
+ end
287
+
288
+ distribution
289
+ end
290
+
291
+ def find_response_by_id(response_id)
292
+ responses = extract_responses_data
293
+ responses.find { |r| r[:id] == response_id }
294
+ end
295
+
296
+ def format_sentiment_distribution(distribution)
297
+ distribution.map { |sentiment, count| "#{sentiment}:#{count}" }.join(", ")
298
+ end
299
+
300
+ def format_segment_distribution(distribution)
301
+ parts = []
302
+ distribution.each do |segment_type, values|
303
+ values.each do |value, count|
304
+ parts << "#{segment_type}_#{value}:#{count}"
305
+ end
306
+ end
307
+ parts.join(", ")
308
+ end
309
+
310
+ # Override build_base_row for entities and key phrases
311
+ def build_base_row(response, index)
312
+ response_id = response[:id] || "r_#{index + 1}"
313
+ text = response[:sentence] || response[:answer] || ""
314
+
315
+ case analysis_type
316
+ when :sentiment
317
+ sentiment_label = response[:sentiment_label] || response[:sentiment] || ""
318
+ sentiment_score = response[:sentiment_score] || 0.0
319
+ [response_id, text, sentiment_label, sentiment_score]
320
+ when :entities
321
+ entities = response[:entities_found] || ""
322
+ [response_id, text, entities]
323
+ when :key_phrases
324
+ sentiment = response[:sentiment] || ""
325
+ sentiment_score = response[:sentiment_score] || 0.0
326
+ phrases = response[:phrases_found] || ""
327
+ [response_id, text, sentiment, sentiment_score, phrases]
328
+ else
329
+ [response_id, text]
330
+ end
331
+ end
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,152 @@
1
+ require_relative 'exporter'
2
+
3
+ module SentimentInsights
4
+ module Export
5
+ # Module to be extended on result hashes to add export functionality
6
+ module Exportable
7
+ # Export to CSV format
8
+ # @param filename [String, nil] Optional filename. If nil, auto-generates timestamp-based name
9
+ # @param options [Hash] Export options
10
+ # @return [String] Path to exported file
11
+ def to_csv(filename = nil, options = {})
12
+ merged_options = (@export_options || {}).merge(options)
13
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
14
+ exporter.to_csv(filename)
15
+ end
16
+
17
+ # Export to Excel format
18
+ # @param filename [String, nil] Optional filename. If nil, auto-generates timestamp-based name
19
+ # @param options [Hash] Export options
20
+ # @return [String] Path to exported file
21
+ def to_excel(filename = nil, options = {})
22
+ merged_options = (@export_options || {}).merge(options)
23
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
24
+ exporter.to_excel(filename)
25
+ end
26
+
27
+ # Export to JSON format
28
+ # @param filename [String, nil] Optional filename. If nil, auto-generates timestamp-based name
29
+ # @param options [Hash] Export options
30
+ # @return [String] Path to exported file
31
+ def to_json(filename = nil, options = {})
32
+ merged_options = (@export_options || {}).merge(options)
33
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
34
+ exporter.to_json(filename)
35
+ end
36
+
37
+ # Return JSON as string (for API responses)
38
+ # @param options [Hash] Export options
39
+ # @return [String] JSON string
40
+ def to_json_string(options = {})
41
+ merged_options = (@export_options || {}).merge(options)
42
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
43
+ exporter.to_json_string
44
+ end
45
+
46
+ # Return as hash structure (for API responses)
47
+ # @param options [Hash] Export options
48
+ # @return [Hash] Structured data hash
49
+ def to_hash(options = {})
50
+ merged_options = (@export_options || {}).merge(options)
51
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
52
+ exporter.to_hash
53
+ end
54
+
55
+ # Export to specified format (auto-detects from filename extension)
56
+ # @param filename_or_format [String, Symbol] Filename with extension or format symbol (:csv, :excel)
57
+ # @param options [Hash] Export options
58
+ # @return [String] Path to exported file
59
+ def export(filename_or_format = nil, options = {})
60
+ merged_options = (@export_options || {}).merge(options)
61
+ exporter = SentimentInsights::Export::Exporter.new(self, merged_options)
62
+ exporter.export(filename_or_format)
63
+ end
64
+
65
+ # Create an exporter instance for advanced operations
66
+ # @param options [Hash] Export options
67
+ # @return [SentimentInsights::Export::Exporter] Exporter instance
68
+ def exporter(options = {})
69
+ merged_options = (@export_options || {}).merge(options)
70
+ SentimentInsights::Export::Exporter.new(self, merged_options)
71
+ end
72
+
73
+ # Quick export methods with common filters
74
+
75
+ # Export only positive responses
76
+ def export_positive(format = :csv, filename = nil)
77
+ exporter.positive_only(format, filename)
78
+ end
79
+
80
+ # Export only negative responses
81
+ def export_negative(format = :csv, filename = nil)
82
+ exporter.negative_only(format, filename)
83
+ end
84
+
85
+ # Export responses from specific segments
86
+ def export_by_segment(segment_type, segment_values, format = :csv, filename = nil)
87
+ exporter.by_segment(segment_type, segment_values, format, filename)
88
+ end
89
+
90
+ # Export to both CSV and Excel formats
91
+ def export_all(base_filename = nil)
92
+ exporter.export_all(base_filename)
93
+ end
94
+
95
+ # Configuration methods for fluent interface
96
+
97
+ # Configure export with options and return self for chaining
98
+ def with_export_options(options)
99
+ @export_options = (@export_options || {}).merge(options)
100
+ self
101
+ end
102
+
103
+ # Export only summary data (no individual responses)
104
+ def summary_only
105
+ with_export_options(include_segments: false, summary_only: true)
106
+ end
107
+
108
+ # Export only response data (no summary/segments)
109
+ def responses_only
110
+ with_export_options(include_summary: false, include_segments: false)
111
+ end
112
+
113
+ # Include detailed segment analysis
114
+ def with_segments
115
+ with_export_options(include_segments: true)
116
+ end
117
+
118
+ # Exclude segment analysis
119
+ def without_segments
120
+ with_export_options(include_segments: false)
121
+ end
122
+
123
+ # Filter by sentiment
124
+ def filter_sentiment(*sentiments)
125
+ with_export_options(filter: { sentiment: sentiments.flatten })
126
+ end
127
+
128
+ # Filter by segment criteria
129
+ def filter_segments(segment_filters)
130
+ current_filter = @export_options&.dig(:filter) || {}
131
+ new_filter = current_filter.merge(segments: segment_filters)
132
+ with_export_options(filter: new_filter)
133
+ end
134
+
135
+ end
136
+
137
+ # Result class that wraps analysis results with export functionality
138
+ class Result < Hash
139
+ include Exportable
140
+
141
+ def initialize(data = {})
142
+ super()
143
+ merge!(data)
144
+ end
145
+
146
+ # Factory method to create Result from analysis output
147
+ def self.wrap(analysis_result)
148
+ new(analysis_result)
149
+ end
150
+ end
151
+ end
152
+ end
@@ -0,0 +1,169 @@
1
+ require_relative 'csv_exporter'
2
+ require_relative 'excel_exporter'
3
+ require_relative 'json_exporter'
4
+
5
+ module SentimentInsights
6
+ module Export
7
+ class Exporter
8
+ attr_reader :result, :options
9
+
10
+ def initialize(result, options = {})
11
+ @result = result
12
+ @options = options
13
+ end
14
+
15
+ # Main export method - auto-detects format from filename
16
+ def export(filename_or_format = nil, options = {})
17
+ merged_options = @options.merge(options)
18
+
19
+ if filename_or_format.nil?
20
+ # Default to CSV with auto-generated filename
21
+ to_csv(nil, merged_options)
22
+ elsif filename_or_format.is_a?(Symbol)
23
+ # Format specified as symbol
24
+ case filename_or_format
25
+ when :csv
26
+ to_csv(nil, merged_options)
27
+ when :excel, :xlsx
28
+ to_excel(nil, merged_options)
29
+ when :json
30
+ to_json(nil, merged_options)
31
+ else
32
+ raise ArgumentError, "Unsupported export format: #{filename_or_format}"
33
+ end
34
+ else
35
+ # Filename specified - detect format from extension
36
+ extension = File.extname(filename_or_format).downcase
37
+ case extension
38
+ when '.csv'
39
+ to_csv(filename_or_format, merged_options)
40
+ when '.xlsx', '.xls'
41
+ to_excel(filename_or_format, merged_options)
42
+ when '.json'
43
+ to_json(filename_or_format, merged_options)
44
+ else
45
+ # Default to CSV if no extension or unknown extension
46
+ csv_filename = filename_or_format.include?('.') ? filename_or_format : "#{filename_or_format}.csv"
47
+ to_csv(csv_filename, merged_options)
48
+ end
49
+ end
50
+ end
51
+
52
+ # Specific format methods
53
+ def to_csv(filename = nil, options = {})
54
+ merged_options = @options.merge(options)
55
+ CsvExporter.new(@result, merged_options).export(filename)
56
+ end
57
+
58
+ def to_excel(filename = nil, options = {})
59
+ merged_options = @options.merge(options)
60
+ ExcelExporter.new(@result, merged_options).export(filename)
61
+ end
62
+
63
+ def to_json(filename = nil, options = {})
64
+ merged_options = @options.merge(options)
65
+ JsonExporter.new(@result, merged_options).export(filename)
66
+ end
67
+
68
+ # Return JSON as string (for API responses)
69
+ def to_json_string(options = {})
70
+ merged_options = @options.merge(options)
71
+ JsonExporter.new(@result, merged_options).to_json_string
72
+ end
73
+
74
+ # Return as hash (for API responses)
75
+ def to_hash(options = {})
76
+ merged_options = @options.merge(options)
77
+ JsonExporter.new(@result, merged_options).to_hash
78
+ end
79
+
80
+ # Convenience methods for common export scenarios
81
+ def to_csv_summary_only(filename = nil)
82
+ to_csv(filename, include_segments: false)
83
+ end
84
+
85
+ def to_csv_responses_only(filename = nil)
86
+ to_csv(filename, include_summary: false, include_segments: false)
87
+ end
88
+
89
+ def to_excel_detailed(filename = nil)
90
+ to_excel(filename, include_summary: true, include_segments: true)
91
+ end
92
+
93
+ # Filtered exports
94
+ def positive_only(format = :csv, filename = nil)
95
+ options = { filter: { sentiment: [:positive] } }
96
+
97
+ case format
98
+ when :csv
99
+ to_csv(filename, options)
100
+ when :excel, :xlsx
101
+ to_excel(filename, options)
102
+ else
103
+ raise ArgumentError, "Unsupported format: #{format}"
104
+ end
105
+ end
106
+
107
+ def negative_only(format = :csv, filename = nil)
108
+ options = { filter: { sentiment: [:negative] } }
109
+
110
+ case format
111
+ when :csv
112
+ to_csv(filename, options)
113
+ when :excel, :xlsx
114
+ to_excel(filename, options)
115
+ else
116
+ raise ArgumentError, "Unsupported format: #{format}"
117
+ end
118
+ end
119
+
120
+ def by_segment(segment_type, segment_values, format = :csv, filename = nil)
121
+ options = {
122
+ filter: {
123
+ segments: { segment_type.to_sym => Array(segment_values) }
124
+ }
125
+ }
126
+
127
+ case format
128
+ when :csv
129
+ to_csv(filename, options)
130
+ when :excel, :xlsx
131
+ to_excel(filename, options)
132
+ else
133
+ raise ArgumentError, "Unsupported format: #{format}"
134
+ end
135
+ end
136
+
137
+ # Batch export - multiple formats at once
138
+ def export_all(base_filename = nil)
139
+ base_name = base_filename || detect_base_filename
140
+
141
+ files = {}
142
+ files[:csv] = to_csv("#{base_name}.csv")
143
+
144
+ begin
145
+ files[:excel] = to_excel("#{base_name}.xlsx")
146
+ rescue => e
147
+ puts "Warning: Excel export failed: #{e.message}"
148
+ end
149
+
150
+ files
151
+ end
152
+
153
+ private
154
+
155
+ def detect_base_filename
156
+ analysis_type = detect_analysis_type(@result)
157
+ timestamp = Time.now.strftime("%Y%m%d_%H%M%S")
158
+ "#{analysis_type}_analysis_#{timestamp}"
159
+ end
160
+
161
+ def detect_analysis_type(result)
162
+ return :sentiment if result.key?(:global_summary) || result.key?(:responses)
163
+ return :entities if result.key?(:entities)
164
+ return :key_phrases if result.key?(:phrases)
165
+ :unknown
166
+ end
167
+ end
168
+ end
169
+ end