broadlistening 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (38) hide show
  1. checksums.yaml +7 -0
  2. data/.rspec +3 -0
  3. data/.rubocop.yml +3 -0
  4. data/CHANGELOG.md +40 -0
  5. data/CLAUDE.md +112 -0
  6. data/LICENSE +24 -0
  7. data/LICENSE-AGPLv3.txt +661 -0
  8. data/README.md +195 -0
  9. data/Rakefile +77 -0
  10. data/exe/broadlistening +6 -0
  11. data/lib/broadlistening/argument.rb +136 -0
  12. data/lib/broadlistening/cli.rb +196 -0
  13. data/lib/broadlistening/comment.rb +128 -0
  14. data/lib/broadlistening/compatibility.rb +375 -0
  15. data/lib/broadlistening/config.rb +190 -0
  16. data/lib/broadlistening/context.rb +180 -0
  17. data/lib/broadlistening/csv_loader.rb +109 -0
  18. data/lib/broadlistening/hierarchical_clustering.rb +142 -0
  19. data/lib/broadlistening/kmeans.rb +185 -0
  20. data/lib/broadlistening/llm_client.rb +84 -0
  21. data/lib/broadlistening/pipeline.rb +129 -0
  22. data/lib/broadlistening/planner.rb +114 -0
  23. data/lib/broadlistening/provider.rb +97 -0
  24. data/lib/broadlistening/spec_loader.rb +86 -0
  25. data/lib/broadlistening/status.rb +132 -0
  26. data/lib/broadlistening/steps/aggregation.rb +228 -0
  27. data/lib/broadlistening/steps/base_step.rb +42 -0
  28. data/lib/broadlistening/steps/clustering.rb +103 -0
  29. data/lib/broadlistening/steps/embedding.rb +40 -0
  30. data/lib/broadlistening/steps/extraction.rb +73 -0
  31. data/lib/broadlistening/steps/initial_labelling.rb +85 -0
  32. data/lib/broadlistening/steps/merge_labelling.rb +93 -0
  33. data/lib/broadlistening/steps/overview.rb +36 -0
  34. data/lib/broadlistening/version.rb +5 -0
  35. data/lib/broadlistening.rb +44 -0
  36. data/schema/hierarchical_result.json +152 -0
  37. data/sig/broadlistening.rbs +4 -0
  38. metadata +194 -0
@@ -0,0 +1,228 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "csv"
4
+
5
+ module Broadlistening
6
+ module Steps
7
+ class Aggregation < BaseStep
8
+ CSV_FILENAME = "final_result_with_comments.csv"
9
+
10
+ # Output format compatible with Kouchou-AI Python implementation
11
+ def execute
12
+ result = {
13
+ arguments: build_arguments,
14
+ clusters: build_clusters,
15
+ comments: build_comments,
16
+ propertyMap: build_property_map,
17
+ translations: build_translations,
18
+ overview: context.overview,
19
+ config: config.to_h,
20
+ comment_num: context.comments.size
21
+ }
22
+
23
+ context.result = result
24
+
25
+ export_csv if config.is_pubcom && context.output_dir
26
+
27
+ context
28
+ end
29
+
30
+ private
31
+
32
+ def build_arguments
33
+ context.arguments.map do |arg|
34
+ build_single_argument(arg)
35
+ end
36
+ end
37
+
38
+ def build_single_argument(arg)
39
+ result = {
40
+ arg_id: arg.arg_id,
41
+ argument: arg.argument,
42
+ comment_id: arg.comment_id_int,
43
+ x: arg.x&.to_f,
44
+ y: arg.y&.to_f,
45
+ p: 0,
46
+ cluster_ids: arg.cluster_ids
47
+ }
48
+
49
+ result[:attributes] = arg.attributes if arg.attributes
50
+ result[:url] = arg.url if config.enable_source_link && arg.url
51
+
52
+ result
53
+ end
54
+
55
+ def build_clusters
56
+ clusters = [ root_cluster ]
57
+
58
+ context.labels.each_value do |label|
59
+ clusters << {
60
+ level: label[:level],
61
+ id: label[:cluster_id],
62
+ label: label[:label],
63
+ takeaway: label[:description] || "",
64
+ value: count_arguments_in_cluster(label[:cluster_id]),
65
+ parent: find_parent_cluster(label),
66
+ density_rank_percentile: nil
67
+ }
68
+ end
69
+
70
+ clusters.sort_by { |c| [ c[:level], c[:id] ] }
71
+ end
72
+
73
+ def root_cluster
74
+ {
75
+ level: 0,
76
+ id: "0",
77
+ label: "全体",
78
+ takeaway: "",
79
+ value: context.arguments.size,
80
+ parent: "",
81
+ density_rank_percentile: nil
82
+ }
83
+ end
84
+
85
+ def count_arguments_in_cluster(cluster_id)
86
+ context.arguments.count { |arg| arg.in_cluster?(cluster_id) }
87
+ end
88
+
89
+ def find_parent_cluster(label)
90
+ return "0" if label[:level] == 1
91
+
92
+ parent_level = label[:level] - 1
93
+
94
+ # Find an argument that belongs to this cluster
95
+ arg_idx = context.arguments.index { |arg| arg.in_cluster?(label[:cluster_id]) }
96
+ return "0" unless arg_idx
97
+
98
+ parent_cluster_num = context.cluster_results[parent_level][arg_idx]
99
+ "#{parent_level}_#{parent_cluster_num}"
100
+ end
101
+
102
+ def build_comments
103
+ comments_with_args = Set.new
104
+ context.arguments.each do |arg|
105
+ comments_with_args.add(arg.comment_id_int)
106
+ end
107
+
108
+ result = {}
109
+ context.comments.each do |comment|
110
+ comment_id = comment.id.to_i
111
+ next unless comments_with_args.include?(comment_id)
112
+
113
+ result[comment_id.to_s] = { comment: comment.body }
114
+ end
115
+
116
+ result
117
+ end
118
+
119
+ def build_property_map
120
+ return {} if config.property_names.empty?
121
+
122
+ property_map = {}
123
+ config.property_names.each do |prop_name|
124
+ property_map[prop_name.to_s] = {}
125
+ end
126
+
127
+ context.arguments.each do |arg|
128
+ next unless arg.properties
129
+
130
+ arg.properties.each do |prop_name, value|
131
+ property_map[prop_name.to_s] ||= {}
132
+ property_map[prop_name.to_s][arg.arg_id] = normalize_property_value(value)
133
+ end
134
+ end
135
+
136
+ property_map
137
+ end
138
+
139
+ def normalize_property_value(value)
140
+ return nil if value.nil?
141
+
142
+ case value
143
+ when Integer, Float, String, TrueClass, FalseClass
144
+ value
145
+ when Array
146
+ value.map { |v| normalize_property_value(v) }
147
+ else
148
+ value.to_s
149
+ end
150
+ end
151
+
152
+ def build_translations
153
+ {}
154
+ end
155
+
156
+ # Export CSV with original comments for pubcom mode
157
+ def export_csv
158
+ csv_path = Pathname.new(context.output_dir) / CSV_FILENAME
159
+ level1_labels = build_level1_label_map
160
+
161
+ CSV.open(csv_path, "w", encoding: "UTF-8") do |csv|
162
+ csv << csv_headers
163
+ context.arguments.each do |arg|
164
+ csv << build_csv_row(arg, level1_labels)
165
+ end
166
+ end
167
+ end
168
+
169
+ def csv_headers
170
+ headers = %w[comment_id original_comment arg_id argument category_id category x y]
171
+ headers += attribute_columns
172
+ headers
173
+ end
174
+
175
+ def build_csv_row(arg, level1_labels)
176
+ comment = find_comment(arg.comment_id)
177
+ level1_cluster_id = find_level1_cluster_id(arg)
178
+ category_label = level1_labels[level1_cluster_id] || ""
179
+
180
+ row = [
181
+ arg.comment_id,
182
+ comment&.body || "",
183
+ arg.arg_id,
184
+ arg.argument,
185
+ level1_cluster_id,
186
+ category_label,
187
+ arg.x,
188
+ arg.y
189
+ ]
190
+
191
+ # Add attribute values
192
+ attribute_columns.each do |attr_name|
193
+ row << (arg.attributes&.dig(attr_name.sub(/^attribute_/, "")) || comment&.attributes&.dig(attr_name.sub(/^attribute_/, "")))
194
+ end
195
+
196
+ row
197
+ end
198
+
199
+ def build_level1_label_map
200
+ context.labels
201
+ .select { |_, label| label[:level] == 1 }
202
+ .transform_values { |label| label[:label] }
203
+ .transform_keys(&:to_s)
204
+ end
205
+
206
+ def find_level1_cluster_id(arg)
207
+ arg.cluster_ids&.find { |id| id.start_with?("1_") } || ""
208
+ end
209
+
210
+ def find_comment(comment_id)
211
+ context.comments.find { |c| c.id.to_s == comment_id.to_s }
212
+ end
213
+
214
+ def attribute_columns
215
+ @attribute_columns ||= begin
216
+ attrs = Set.new
217
+ context.arguments.each do |arg|
218
+ arg.attributes&.each_key { |k| attrs.add("attribute_#{k}") }
219
+ end
220
+ context.comments.each do |comment|
221
+ comment.attributes&.each_key { |k| attrs.add("attribute_#{k}") }
222
+ end
223
+ attrs.to_a.sort
224
+ end
225
+ end
226
+ end
227
+ end
228
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class BaseStep
6
+ attr_reader :config, :context
7
+
8
+ # @param config [Config] Pipeline configuration
9
+ # @param context [Context] Pipeline context
10
+ def initialize(config, context)
11
+ @config = config
12
+ @context = context
13
+
14
+ raise ArgumentError, "context must be a Context, got #{context.class}" unless context.is_a?(Context)
15
+ end
16
+
17
+ def execute
18
+ raise NotImplementedError, "#{self.class} must implement #execute"
19
+ end
20
+
21
+ protected
22
+
23
+ def llm_client
24
+ @llm_client ||= LlmClient.new(config)
25
+ end
26
+
27
+ def instrument(event_name, payload = {}, &block)
28
+ ActiveSupport::Notifications.instrument(event_name, payload, &block)
29
+ end
30
+
31
+ def notify_progress(current:, total:, message: nil)
32
+ instrument("progress.broadlistening", {
33
+ step: self.class.name.demodulize.underscore,
34
+ current: current,
35
+ total: total,
36
+ percentage: total.positive? ? (current.to_f / total * 100).round(1) : 0,
37
+ message: message
38
+ })
39
+ end
40
+ end
41
+ end
42
+ end
@@ -0,0 +1,103 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class Clustering < BaseStep
6
+ def execute
7
+ return context if context.arguments.empty?
8
+
9
+ embeddings = build_embeddings_matrix(context.arguments)
10
+ umap_coords = perform_umap(embeddings)
11
+ cluster_results = perform_hierarchical_clustering(umap_coords)
12
+
13
+ assign_cluster_info_to_arguments(context.arguments, umap_coords, cluster_results)
14
+
15
+ context.cluster_results = cluster_results
16
+ context.umap_coords = umap_coords
17
+ context
18
+ end
19
+
20
+ private
21
+
22
+ def build_embeddings_matrix(arguments)
23
+ Numo::DFloat.cast(arguments.map(&:embedding))
24
+ end
25
+
26
+ def perform_umap(embeddings)
27
+ n_samples = embeddings.shape[0]
28
+ num_neighbors = [ 15, n_samples - 1 ].min
29
+
30
+ # Convert to SFloat for umappp (required format)
31
+ embeddings_sfloat = Numo::SFloat.cast(embeddings)
32
+
33
+ # Umappp.run returns 2D coordinates
34
+ result = Umappp.run(
35
+ embeddings_sfloat,
36
+ ndim: 2,
37
+ num_neighbors: num_neighbors,
38
+ seed: 42
39
+ )
40
+
41
+ # Convert back to DFloat for consistency
42
+ Numo::DFloat.cast(result)
43
+ end
44
+
45
+ def perform_hierarchical_clustering(umap_coords)
46
+ cluster_nums = config.cluster_nums.sort
47
+ n_samples = umap_coords.shape[0]
48
+
49
+ # Adjust cluster numbers if we have fewer samples
50
+ adjusted_cluster_nums = cluster_nums.map { |n| [ n, n_samples ].min }.uniq
51
+
52
+ max_clusters = adjusted_cluster_nums.last
53
+
54
+ # Perform KMeans with max clusters
55
+ kmeans = KMeans.new(
56
+ n_clusters: max_clusters,
57
+ random_state: 42
58
+ )
59
+ kmeans.fit(umap_coords)
60
+
61
+ # Build hierarchical results
62
+ build_hierarchical_results(kmeans, adjusted_cluster_nums)
63
+ end
64
+
65
+ def build_hierarchical_results(kmeans, cluster_nums)
66
+ results = {}
67
+
68
+ cluster_nums[0..-2].each_with_index do |n_target, level|
69
+ merged_labels = HierarchicalClustering.merge(
70
+ kmeans.centroids,
71
+ kmeans.labels,
72
+ n_target
73
+ )
74
+ results[level + 1] = merged_labels
75
+ end
76
+
77
+ # Final level uses KMeans labels directly
78
+ results[cluster_nums.size] = kmeans.labels
79
+
80
+ results
81
+ end
82
+
83
+ def assign_cluster_info_to_arguments(arguments, umap_coords, cluster_results)
84
+ arguments.each_with_index do |arg, idx|
85
+ arg.x = umap_coords[idx, 0]
86
+ arg.y = umap_coords[idx, 1]
87
+ arg.cluster_ids = build_cluster_ids(idx, cluster_results)
88
+ end
89
+ end
90
+
91
+ def build_cluster_ids(idx, cluster_results)
92
+ cluster_ids = [ "0" ] # Root cluster
93
+
94
+ cluster_results.keys.sort.each do |level|
95
+ cluster_id = "#{level}_#{cluster_results[level][idx]}"
96
+ cluster_ids << cluster_id
97
+ end
98
+
99
+ cluster_ids
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class Embedding < BaseStep
6
+ BATCH_SIZE = 1000
7
+
8
+ def execute
9
+ return context if context.arguments.empty?
10
+
11
+ embeddings = compute_embeddings(context.arguments)
12
+ attach_embeddings_to_arguments(context.arguments, embeddings)
13
+
14
+ context
15
+ end
16
+
17
+ private
18
+
19
+ def compute_embeddings(arguments)
20
+ texts = arguments.map(&:argument)
21
+ embeddings = []
22
+ total_batches = (texts.size.to_f / BATCH_SIZE).ceil
23
+
24
+ texts.each_slice(BATCH_SIZE).with_index(1) do |batch, batch_num|
25
+ batch_embeddings = llm_client.embed(batch)
26
+ embeddings.concat(batch_embeddings)
27
+ notify_progress(current: batch_num, total: total_batches)
28
+ end
29
+
30
+ embeddings
31
+ end
32
+
33
+ def attach_embeddings_to_arguments(arguments, embeddings)
34
+ arguments.each_with_index do |arg, idx|
35
+ arg.embedding = embeddings[idx]
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class Extraction < BaseStep
6
+ def execute
7
+ return context if context.comments.empty?
8
+
9
+ results = extract_opinions_in_parallel(context.comments)
10
+ build_arguments_and_relations(context.comments, results)
11
+
12
+ context
13
+ end
14
+
15
+ private
16
+
17
+ def extract_opinions_in_parallel(comments)
18
+ total = comments.size
19
+ mutex = Mutex.new
20
+ processed = 0
21
+
22
+ Parallel.map(comments, in_threads: config.workers) do |comment|
23
+ result = extract_arguments_from_comment(comment)
24
+ current = mutex.synchronize { processed += 1 }
25
+ notify_progress(current: current, total: total)
26
+ result
27
+ end
28
+ end
29
+
30
+ def extract_arguments_from_comment(comment)
31
+ return [] if comment.empty?
32
+
33
+ response = llm_client.chat(
34
+ system: config.prompts[:extraction],
35
+ user: comment.body,
36
+ json_mode: true
37
+ )
38
+ parse_extraction_response(response)
39
+ rescue StandardError => e
40
+ warn "Failed to extract from comment #{comment.id}: #{e.message}"
41
+ []
42
+ end
43
+
44
+ def parse_extraction_response(response)
45
+ parsed = JSON.parse(response)
46
+ opinions = parsed["extractedOpinionList"] || parsed["opinions"] || []
47
+ opinions.select { |o| o.is_a?(String) && !o.strip.empty? }
48
+ rescue JSON::ParserError
49
+ parse_fallback_response(response)
50
+ end
51
+
52
+ def parse_fallback_response(response)
53
+ response.split("\n").map(&:strip).reject(&:empty?)
54
+ end
55
+
56
+ def build_arguments_and_relations(comments, results)
57
+ results.each_with_index do |extracted_opinions, idx|
58
+ comment = comments[idx]
59
+ extracted_opinions.each_with_index do |opinion_text, opinion_idx|
60
+ arg = Argument.from_comment(comment, opinion_text, opinion_idx)
61
+ context.arguments << arg
62
+
63
+ context.relations << {
64
+ arg_id: arg.arg_id,
65
+ comment_id: arg.comment_id,
66
+ proposal_id: comment.proposal_id
67
+ }
68
+ end
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,85 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class InitialLabelling < BaseStep
6
+ SAMPLING_NUM = 30
7
+
8
+ def execute
9
+ return context if context.arguments.empty? || context.cluster_results.empty?
10
+
11
+ max_level = context.cluster_results.keys.max
12
+ cluster_ids = context.cluster_results[max_level].uniq
13
+
14
+ labels = label_clusters_in_parallel(context.arguments, max_level, cluster_ids)
15
+
16
+ context.initial_labels = labels.to_h { |l| [ l[:cluster_id], l ] }
17
+ context
18
+ end
19
+
20
+ private
21
+
22
+ def label_clusters_in_parallel(arguments, level, cluster_ids)
23
+ total = cluster_ids.size
24
+ mutex = Mutex.new
25
+ processed = 0
26
+
27
+ Parallel.map(cluster_ids, in_threads: config.workers) do |cluster_id|
28
+ result = label_single_cluster(arguments, level, cluster_id)
29
+ current = mutex.synchronize { processed += 1 }
30
+ notify_progress(current: current, total: total)
31
+ result
32
+ end
33
+ end
34
+
35
+ def label_single_cluster(arguments, level, cluster_id)
36
+ cluster_args = filter_arguments_by_cluster(arguments, level, cluster_id)
37
+ sampled = sample_arguments(cluster_args)
38
+
39
+ input = sampled.map(&:argument).join("\n")
40
+
41
+ response = llm_client.chat(
42
+ system: config.prompts[:initial_labelling],
43
+ user: input,
44
+ json_mode: true
45
+ )
46
+
47
+ parse_label_response(response, level, cluster_id)
48
+ rescue StandardError => e
49
+ warn "Failed to label cluster #{level}_#{cluster_id}: #{e.message}"
50
+ default_label(level, cluster_id)
51
+ end
52
+
53
+ def filter_arguments_by_cluster(arguments, level, cluster_id)
54
+ target_cluster_id = "#{level}_#{cluster_id}"
55
+ arguments.select { |arg| arg.in_cluster?(target_cluster_id) }
56
+ end
57
+
58
+ def sample_arguments(cluster_args)
59
+ sample_size = [ SAMPLING_NUM, cluster_args.size ].min
60
+ cluster_args.sample(sample_size)
61
+ end
62
+
63
+ def parse_label_response(response, level, cluster_id)
64
+ parsed = JSON.parse(response)
65
+ {
66
+ cluster_id: "#{level}_#{cluster_id}",
67
+ level: level,
68
+ label: parsed["label"] || "グループ#{cluster_id}",
69
+ description: parsed["description"] || ""
70
+ }
71
+ rescue JSON::ParserError
72
+ default_label(level, cluster_id)
73
+ end
74
+
75
+ def default_label(level, cluster_id)
76
+ {
77
+ cluster_id: "#{level}_#{cluster_id}",
78
+ level: level,
79
+ label: "グループ#{cluster_id}",
80
+ description: ""
81
+ }
82
+ end
83
+ end
84
+ end
85
+ end
@@ -0,0 +1,93 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class MergeLabelling < BaseStep
6
+ def execute
7
+ return context if context.initial_labels.empty?
8
+
9
+ all_labels = context.initial_labels.dup
10
+
11
+ # Build parent-child relationships and merge from bottom to top
12
+ levels = context.cluster_results.keys.sort.reverse
13
+ levels[1..].each do |level|
14
+ parent_labels = merge_labels_for_level(context.arguments, all_labels, context.cluster_results, level)
15
+ parent_labels.each { |l| all_labels[l[:cluster_id]] = l }
16
+ end
17
+
18
+ context.labels = all_labels
19
+ context
20
+ end
21
+
22
+ private
23
+
24
+ def merge_labels_for_level(arguments, all_labels, cluster_results, level)
25
+ child_level = level + 1
26
+ parent_clusters = cluster_results[level].uniq
27
+ total = parent_clusters.size
28
+ mutex = Mutex.new
29
+ processed = 0
30
+
31
+ Parallel.map(parent_clusters, in_threads: config.workers) do |parent_cluster_id|
32
+ result = merge_single_parent(arguments, all_labels, cluster_results, level, child_level, parent_cluster_id)
33
+ current = mutex.synchronize { processed += 1 }
34
+ notify_progress(current: current, total: total, message: "level #{level}")
35
+ result
36
+ end
37
+ end
38
+
39
+ def merge_single_parent(arguments, all_labels, cluster_results, level, child_level, parent_cluster_id)
40
+ child_cluster_ids = find_child_clusters(arguments, cluster_results, level, child_level, parent_cluster_id)
41
+ child_labels = child_cluster_ids.filter_map { |cid| all_labels["#{child_level}_#{cid}"] }
42
+
43
+ return default_label(level, parent_cluster_id) if child_labels.empty?
44
+
45
+ input = child_labels.map { |l| "- #{l[:label]}: #{l[:description]}" }.join("\n")
46
+
47
+ response = llm_client.chat(
48
+ system: config.prompts[:merge_labelling],
49
+ user: input,
50
+ json_mode: true
51
+ )
52
+
53
+ parse_label_response(response, level, parent_cluster_id)
54
+ rescue StandardError => e
55
+ warn "Failed to merge labels for cluster #{level}_#{parent_cluster_id}: #{e.message}"
56
+ default_label(level, parent_cluster_id)
57
+ end
58
+
59
+ def find_child_clusters(arguments, cluster_results, parent_level, child_level, parent_cluster_id)
60
+ child_clusters = Set.new
61
+
62
+ arguments.each_with_index do |_arg, idx|
63
+ next unless cluster_results[parent_level][idx] == parent_cluster_id
64
+
65
+ child_clusters.add(cluster_results[child_level][idx])
66
+ end
67
+
68
+ child_clusters.to_a
69
+ end
70
+
71
+ def parse_label_response(response, level, cluster_id)
72
+ parsed = JSON.parse(response)
73
+ {
74
+ cluster_id: "#{level}_#{cluster_id}",
75
+ level: level,
76
+ label: parsed["label"] || "グループ#{cluster_id}",
77
+ description: parsed["description"] || ""
78
+ }
79
+ rescue JSON::ParserError
80
+ default_label(level, cluster_id)
81
+ end
82
+
83
+ def default_label(level, cluster_id)
84
+ {
85
+ cluster_id: "#{level}_#{cluster_id}",
86
+ level: level,
87
+ label: "グループ#{cluster_id}",
88
+ description: ""
89
+ }
90
+ end
91
+ end
92
+ end
93
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ module Steps
5
+ class Overview < BaseStep
6
+ def execute
7
+ return context if context.labels.empty?
8
+
9
+ top_labels = find_top_level_labels(context.labels)
10
+ overview = generate_overview(top_labels)
11
+
12
+ context.overview = overview
13
+ context
14
+ end
15
+
16
+ private
17
+
18
+ def find_top_level_labels(labels)
19
+ min_level = labels.values.map { |l| l[:level] }.min
20
+ labels.values.select { |l| l[:level] == min_level }
21
+ end
22
+
23
+ def generate_overview(top_labels)
24
+ input = top_labels.map { |l| "- #{l[:label]}: #{l[:description]}" }.join("\n")
25
+
26
+ llm_client.chat(
27
+ system: config.prompts[:overview],
28
+ user: input
29
+ )
30
+ rescue StandardError => e
31
+ warn "Failed to generate overview: #{e.message}"
32
+ ""
33
+ end
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Broadlistening
4
+ VERSION = "0.7.0"
5
+ end