fractor 0.1.4 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop-https---raw-githubusercontent-com-riboseinc-oss-guides-main-ci-rubocop-yml +552 -0
- data/.rubocop.yml +14 -8
- data/.rubocop_todo.yml +162 -46
- data/README.adoc +1364 -376
- data/examples/auto_detection/auto_detection.rb +9 -9
- data/examples/continuous_chat_common/message_protocol.rb +53 -0
- data/examples/continuous_chat_fractor/README.adoc +217 -0
- data/examples/continuous_chat_fractor/chat_client.rb +303 -0
- data/examples/continuous_chat_fractor/chat_common.rb +83 -0
- data/examples/continuous_chat_fractor/chat_server.rb +167 -0
- data/examples/continuous_chat_fractor/simulate.rb +345 -0
- data/examples/continuous_chat_server/README.adoc +135 -0
- data/examples/continuous_chat_server/chat_client.rb +303 -0
- data/examples/continuous_chat_server/chat_server.rb +359 -0
- data/examples/continuous_chat_server/simulate.rb +343 -0
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +12 -8
- data/examples/multi_work_type/multi_work_type.rb +30 -29
- data/examples/pipeline_processing/pipeline_processing.rb +15 -15
- data/examples/producer_subscriber/producer_subscriber.rb +20 -16
- data/examples/scatter_gather/scatter_gather.rb +29 -28
- data/examples/simple/sample.rb +5 -5
- data/examples/specialized_workers/specialized_workers.rb +44 -37
- data/lib/fractor/continuous_server.rb +188 -0
- data/lib/fractor/result_aggregator.rb +1 -1
- data/lib/fractor/supervisor.rb +277 -104
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor/work_queue.rb +68 -0
- data/lib/fractor/work_result.rb +1 -1
- data/lib/fractor/worker.rb +2 -1
- data/lib/fractor/wrapped_ractor.rb +12 -2
- data/lib/fractor.rb +2 -0
- metadata +15 -2
|
@@ -10,7 +10,7 @@ module HierarchicalHasher
|
|
|
10
10
|
super({
|
|
11
11
|
data: data,
|
|
12
12
|
start: start,
|
|
13
|
-
length: length || data.bytesize
|
|
13
|
+
length: length || data.bytesize,
|
|
14
14
|
})
|
|
15
15
|
end
|
|
16
16
|
|
|
@@ -46,15 +46,15 @@ module HierarchicalHasher
|
|
|
46
46
|
result: {
|
|
47
47
|
start: work.start,
|
|
48
48
|
length: work.length,
|
|
49
|
-
hash: hash
|
|
49
|
+
hash: hash,
|
|
50
50
|
},
|
|
51
|
-
work: work
|
|
51
|
+
work: work,
|
|
52
52
|
)
|
|
53
53
|
rescue StandardError => e
|
|
54
54
|
# Return error result if something goes wrong
|
|
55
55
|
Fractor::WorkResult.new(
|
|
56
56
|
error: "Failed to hash chunk: #{e.message}",
|
|
57
|
-
work: work
|
|
57
|
+
work: work,
|
|
58
58
|
)
|
|
59
59
|
end
|
|
60
60
|
end
|
|
@@ -74,8 +74,8 @@ module HierarchicalHasher
|
|
|
74
74
|
# Create the supervisor with our worker class in a worker pool
|
|
75
75
|
supervisor = Fractor::Supervisor.new(
|
|
76
76
|
worker_pools: [
|
|
77
|
-
{ worker_class: HashWorker, num_workers: @worker_count }
|
|
78
|
-
]
|
|
77
|
+
{ worker_class: HashWorker, num_workers: @worker_count },
|
|
78
|
+
],
|
|
79
79
|
)
|
|
80
80
|
|
|
81
81
|
# Load the file and create work chunks
|
|
@@ -111,10 +111,14 @@ module HierarchicalHasher
|
|
|
111
111
|
return nil if results_aggregator.results.empty?
|
|
112
112
|
|
|
113
113
|
# Sort results by start position
|
|
114
|
-
sorted_results = results_aggregator.results.sort_by
|
|
114
|
+
sorted_results = results_aggregator.results.sort_by do |result|
|
|
115
|
+
result.result[:start]
|
|
116
|
+
end
|
|
115
117
|
|
|
116
118
|
# Concatenate all hashes with newlines
|
|
117
|
-
combined_hash_string = sorted_results.map
|
|
119
|
+
combined_hash_string = sorted_results.map do |result|
|
|
120
|
+
result.result[:hash]
|
|
121
|
+
end.join("\n")
|
|
118
122
|
|
|
119
123
|
# Calculate final SHA-256 hash (instead of SHA3)
|
|
120
124
|
Digest::SHA256.hexdigest(combined_hash_string)
|
|
@@ -45,7 +45,7 @@ module MultiWorkType
|
|
|
45
45
|
end
|
|
46
46
|
|
|
47
47
|
def to_s
|
|
48
|
-
"ImageWork: dimensions=#{dimensions.join(
|
|
48
|
+
"ImageWork: dimensions=#{dimensions.join('x')}, format=#{format}"
|
|
49
49
|
end
|
|
50
50
|
end
|
|
51
51
|
|
|
@@ -62,7 +62,7 @@ module MultiWorkType
|
|
|
62
62
|
error = TypeError.new("Unsupported work type: #{work.class}")
|
|
63
63
|
Fractor::WorkResult.new(
|
|
64
64
|
error: error,
|
|
65
|
-
work: work
|
|
65
|
+
work: work,
|
|
66
66
|
)
|
|
67
67
|
end
|
|
68
68
|
end
|
|
@@ -74,7 +74,8 @@ module MultiWorkType
|
|
|
74
74
|
sleep(rand(0.01..0.05)) # Simulate processing time
|
|
75
75
|
|
|
76
76
|
processed_text = case work.format
|
|
77
|
-
when :markdown then process_markdown(work.data,
|
|
77
|
+
when :markdown then process_markdown(work.data,
|
|
78
|
+
work.options)
|
|
78
79
|
when :html then process_html(work.data, work.options)
|
|
79
80
|
when :json then process_json(work.data, work.options)
|
|
80
81
|
else work.data.upcase # Simple transformation for plain text
|
|
@@ -87,10 +88,10 @@ module MultiWorkType
|
|
|
87
88
|
transformed_data: processed_text,
|
|
88
89
|
metadata: {
|
|
89
90
|
word_count: processed_text.split(/\s+/).size,
|
|
90
|
-
char_count: processed_text.length
|
|
91
|
-
}
|
|
91
|
+
char_count: processed_text.length,
|
|
92
|
+
},
|
|
92
93
|
},
|
|
93
|
-
work: work
|
|
94
|
+
work: work,
|
|
94
95
|
)
|
|
95
96
|
end
|
|
96
97
|
|
|
@@ -110,13 +111,13 @@ module MultiWorkType
|
|
|
110
111
|
applied_filters: %i[sharpen contrast],
|
|
111
112
|
processing_metadata: {
|
|
112
113
|
original_size: input_size,
|
|
113
|
-
processed_size: (input_size * 0.8).to_i # Simulate compression
|
|
114
|
-
}
|
|
114
|
+
processed_size: (input_size * 0.8).to_i, # Simulate compression
|
|
115
|
+
},
|
|
115
116
|
}
|
|
116
117
|
|
|
117
118
|
Fractor::WorkResult.new(
|
|
118
119
|
result: simulated_result,
|
|
119
|
-
work: work
|
|
120
|
+
work: work,
|
|
120
121
|
)
|
|
121
122
|
end
|
|
122
123
|
|
|
@@ -127,7 +128,7 @@ module MultiWorkType
|
|
|
127
128
|
links = text.scan(/\[(.+?)\]\((.+?)\)/)
|
|
128
129
|
|
|
129
130
|
"Processed Markdown: #{text.length} chars, #{headers.size} headers, #{links.size} links\n" \
|
|
130
|
-
"Headers: #{headers.join(
|
|
131
|
+
"Headers: #{headers.join(', ')}\n" \
|
|
131
132
|
"#{text.gsub(/^#+\s+(.+)$/, '💫 \1 💫')}"
|
|
132
133
|
end
|
|
133
134
|
|
|
@@ -136,7 +137,7 @@ module MultiWorkType
|
|
|
136
137
|
tags = text.scan(/<(\w+)[^>]*>/).flatten
|
|
137
138
|
|
|
138
139
|
"Processed HTML: #{text.length} chars, #{tags.size} tags\n" \
|
|
139
|
-
"Tags: #{tags.uniq.join(
|
|
140
|
+
"Tags: #{tags.uniq.join(', ')}\n" \
|
|
140
141
|
"#{text.gsub(%r{<(\w+)[^>]*>(.+?)</\1>}, '✨\2✨')}"
|
|
141
142
|
end
|
|
142
143
|
|
|
@@ -147,7 +148,7 @@ module MultiWorkType
|
|
|
147
148
|
keys = data.keys
|
|
148
149
|
|
|
149
150
|
"Processed JSON: #{keys.size} top-level keys\n" \
|
|
150
|
-
"Keys: #{keys.join(
|
|
151
|
+
"Keys: #{keys.join(', ')}\n" \
|
|
151
152
|
"Pretty-printed: #{data}"
|
|
152
153
|
rescue StandardError => e
|
|
153
154
|
"Invalid JSON: #{e.message}"
|
|
@@ -162,14 +163,14 @@ module MultiWorkType
|
|
|
162
163
|
# Create supervisor with a MultiFormatWorker pool
|
|
163
164
|
@supervisor = Fractor::Supervisor.new(
|
|
164
165
|
worker_pools: [
|
|
165
|
-
{ worker_class: MultiFormatWorker, num_workers: worker_count }
|
|
166
|
-
]
|
|
166
|
+
{ worker_class: MultiFormatWorker, num_workers: worker_count },
|
|
167
|
+
],
|
|
167
168
|
)
|
|
168
169
|
|
|
169
170
|
@results = {
|
|
170
171
|
text: [],
|
|
171
172
|
image: [],
|
|
172
|
-
errors: []
|
|
173
|
+
errors: [],
|
|
173
174
|
}
|
|
174
175
|
end
|
|
175
176
|
|
|
@@ -197,10 +198,10 @@ module MultiWorkType
|
|
|
197
198
|
total_items: text_items.size + image_items.size,
|
|
198
199
|
processed: {
|
|
199
200
|
text: @results[:text].size,
|
|
200
|
-
image: @results[:image].size
|
|
201
|
+
image: @results[:image].size,
|
|
201
202
|
},
|
|
202
203
|
errors: @results[:errors].size,
|
|
203
|
-
results: @results
|
|
204
|
+
results: @results,
|
|
204
205
|
}
|
|
205
206
|
end
|
|
206
207
|
|
|
@@ -220,7 +221,7 @@ module MultiWorkType
|
|
|
220
221
|
results_aggregator.errors.each do |error_result|
|
|
221
222
|
@results[:errors] << {
|
|
222
223
|
error: error_result.error,
|
|
223
|
-
work_type: error_result.work.class.name
|
|
224
|
+
work_type: error_result.work.class.name,
|
|
224
225
|
}
|
|
225
226
|
end
|
|
226
227
|
|
|
@@ -244,21 +245,21 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
244
245
|
text_items = [
|
|
245
246
|
{
|
|
246
247
|
data: "This is a plain text document. It has no special formatting.",
|
|
247
|
-
format: :plain
|
|
248
|
+
format: :plain,
|
|
248
249
|
},
|
|
249
250
|
{
|
|
250
251
|
data: "# Markdown Document\n\nThis is a **bold** statement. Here's a [link](https://example.com).",
|
|
251
|
-
format: :markdown
|
|
252
|
+
format: :markdown,
|
|
252
253
|
},
|
|
253
254
|
{
|
|
254
255
|
data: "<html><body><h1>HTML Document</h1><p>This is a paragraph.</p></body></html>",
|
|
255
|
-
format: :html
|
|
256
|
+
format: :html,
|
|
256
257
|
},
|
|
257
258
|
{
|
|
258
259
|
data: "{name: 'Product', price: 29.99, tags: ['electronics', 'gadget']}",
|
|
259
260
|
format: :json,
|
|
260
|
-
options: { pretty: true }
|
|
261
|
-
}
|
|
261
|
+
options: { pretty: true },
|
|
262
|
+
},
|
|
262
263
|
]
|
|
263
264
|
|
|
264
265
|
# Sample image items (simulated)
|
|
@@ -266,18 +267,18 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
266
267
|
{
|
|
267
268
|
data: "simulated_jpeg_data_1",
|
|
268
269
|
dimensions: [800, 600],
|
|
269
|
-
format: :jpeg
|
|
270
|
+
format: :jpeg,
|
|
270
271
|
},
|
|
271
272
|
{
|
|
272
273
|
data: "simulated_png_data_1",
|
|
273
274
|
dimensions: [1024, 768],
|
|
274
|
-
format: :png
|
|
275
|
+
format: :png,
|
|
275
276
|
},
|
|
276
277
|
{
|
|
277
278
|
data: "simulated_gif_data_1",
|
|
278
279
|
dimensions: [320, 240],
|
|
279
|
-
format: :gif
|
|
280
|
-
}
|
|
280
|
+
format: :gif,
|
|
281
|
+
},
|
|
281
282
|
]
|
|
282
283
|
|
|
283
284
|
worker_count = 4
|
|
@@ -309,8 +310,8 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
309
310
|
puts "Image Processing Results:"
|
|
310
311
|
result[:results][:image].each_with_index do |image_result, index|
|
|
311
312
|
puts "Image Item #{index + 1} (#{image_result[:format]}):"
|
|
312
|
-
puts " Dimensions: #{image_result[:dimensions].join(
|
|
313
|
-
puts " Applied filters: #{image_result[:applied_filters].join(
|
|
313
|
+
puts " Dimensions: #{image_result[:dimensions].join('x')}"
|
|
314
|
+
puts " Applied filters: #{image_result[:applied_filters].join(', ')}"
|
|
314
315
|
puts " Compression: #{(1 - image_result[:processing_metadata][:processed_size].to_f / image_result[:processing_metadata][:original_size]).round(2) * 100}%"
|
|
315
316
|
puts
|
|
316
317
|
end
|
|
@@ -9,7 +9,7 @@ module PipelineProcessing
|
|
|
9
9
|
super({
|
|
10
10
|
data: data,
|
|
11
11
|
stage: stage,
|
|
12
|
-
metadata: metadata
|
|
12
|
+
metadata: metadata,
|
|
13
13
|
})
|
|
14
14
|
end
|
|
15
15
|
|
|
@@ -29,7 +29,7 @@ module PipelineProcessing
|
|
|
29
29
|
"MediaWork: stage=#{stage}, metadata=#{metadata}, data_size=#{begin
|
|
30
30
|
data.bytesize
|
|
31
31
|
rescue StandardError
|
|
32
|
-
|
|
32
|
+
'unknown'
|
|
33
33
|
end}"
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -46,7 +46,7 @@ module PipelineProcessing
|
|
|
46
46
|
else
|
|
47
47
|
return Fractor::WorkResult.new(
|
|
48
48
|
error: "Unknown stage: #{work.stage}",
|
|
49
|
-
work: work
|
|
49
|
+
work: work,
|
|
50
50
|
)
|
|
51
51
|
end
|
|
52
52
|
|
|
@@ -58,7 +58,7 @@ module PipelineProcessing
|
|
|
58
58
|
# Update metadata with processing information
|
|
59
59
|
updated_metadata = work.metadata.merge(
|
|
60
60
|
"#{work.stage}_completed" => true,
|
|
61
|
-
"#{work.stage}_time" => Time.now.to_s
|
|
61
|
+
"#{work.stage}_time" => Time.now.to_s,
|
|
62
62
|
)
|
|
63
63
|
|
|
64
64
|
# Return the result with next stage information
|
|
@@ -67,9 +67,9 @@ module PipelineProcessing
|
|
|
67
67
|
processed_data: result,
|
|
68
68
|
current_stage: work.stage,
|
|
69
69
|
next_stage: next_stage,
|
|
70
|
-
metadata: updated_metadata
|
|
70
|
+
metadata: updated_metadata,
|
|
71
71
|
},
|
|
72
|
-
work: work
|
|
72
|
+
work: work,
|
|
73
73
|
)
|
|
74
74
|
end
|
|
75
75
|
|
|
@@ -95,7 +95,7 @@ module PipelineProcessing
|
|
|
95
95
|
sleep(rand(0.01..0.05)) # Simulate processing time
|
|
96
96
|
tags = %w[landscape portrait nature urban abstract]
|
|
97
97
|
selected_tags = tags.sample(rand(1..3))
|
|
98
|
-
"Tagged image: #{work.data} (tags: #{selected_tags.join(
|
|
98
|
+
"Tagged image: #{work.data} (tags: #{selected_tags.join(', ')})"
|
|
99
99
|
end
|
|
100
100
|
end
|
|
101
101
|
|
|
@@ -106,8 +106,8 @@ module PipelineProcessing
|
|
|
106
106
|
def initialize(worker_count = 4)
|
|
107
107
|
@supervisor = Fractor::Supervisor.new(
|
|
108
108
|
worker_pools: [
|
|
109
|
-
{ worker_class: PipelineWorker, num_workers: worker_count }
|
|
110
|
-
]
|
|
109
|
+
{ worker_class: PipelineWorker, num_workers: worker_count },
|
|
110
|
+
],
|
|
111
111
|
)
|
|
112
112
|
|
|
113
113
|
# Register callback to handle pipeline stage transitions
|
|
@@ -119,7 +119,7 @@ module PipelineProcessing
|
|
|
119
119
|
new_work = MediaWork.new(
|
|
120
120
|
result.result[:processed_data],
|
|
121
121
|
next_stage,
|
|
122
|
-
result.result[:metadata]
|
|
122
|
+
result.result[:metadata],
|
|
123
123
|
)
|
|
124
124
|
@supervisor.add_work_item(new_work)
|
|
125
125
|
end
|
|
@@ -127,7 +127,7 @@ module PipelineProcessing
|
|
|
127
127
|
|
|
128
128
|
@results = {
|
|
129
129
|
completed: [],
|
|
130
|
-
in_progress: []
|
|
130
|
+
in_progress: [],
|
|
131
131
|
}
|
|
132
132
|
end
|
|
133
133
|
|
|
@@ -137,7 +137,7 @@ module PipelineProcessing
|
|
|
137
137
|
MediaWork.new(
|
|
138
138
|
image,
|
|
139
139
|
:resize,
|
|
140
|
-
{ original_filename: image, started_at: Time.now.to_s }
|
|
140
|
+
{ original_filename: image, started_at: Time.now.to_s },
|
|
141
141
|
)
|
|
142
142
|
end
|
|
143
143
|
|
|
@@ -159,7 +159,7 @@ module PipelineProcessing
|
|
|
159
159
|
total_images: images.size,
|
|
160
160
|
completed: @results[:completed].size,
|
|
161
161
|
in_progress: @results[:in_progress].size,
|
|
162
|
-
results: @results[:completed]
|
|
162
|
+
results: @results[:completed],
|
|
163
163
|
}
|
|
164
164
|
end
|
|
165
165
|
end
|
|
@@ -182,7 +182,7 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
182
182
|
"mountains.png",
|
|
183
183
|
"beach.jpg",
|
|
184
184
|
"city_skyline.jpg",
|
|
185
|
-
"forest.png"
|
|
185
|
+
"forest.png",
|
|
186
186
|
]
|
|
187
187
|
|
|
188
188
|
worker_count = 4
|
|
@@ -205,7 +205,7 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
205
205
|
puts "Image #{index + 1}: #{image_result[:processed_data]}"
|
|
206
206
|
puts " Processing path:"
|
|
207
207
|
image_result[:metadata].each do |key, value|
|
|
208
|
-
next unless key.to_s.end_with?("_completed"
|
|
208
|
+
next unless key.to_s.end_with?("_completed", "_time")
|
|
209
209
|
|
|
210
210
|
puts " #{key}: #{value}"
|
|
211
211
|
end
|
|
@@ -8,7 +8,7 @@ module ProducerSubscriber
|
|
|
8
8
|
def initialize(data, depth = 0)
|
|
9
9
|
super({
|
|
10
10
|
data: data,
|
|
11
|
-
depth: depth
|
|
11
|
+
depth: depth,
|
|
12
12
|
})
|
|
13
13
|
end
|
|
14
14
|
|
|
@@ -31,7 +31,7 @@ module ProducerSubscriber
|
|
|
31
31
|
super({
|
|
32
32
|
data: data,
|
|
33
33
|
parent_id: parent_id,
|
|
34
|
-
depth: depth
|
|
34
|
+
depth: depth,
|
|
35
35
|
})
|
|
36
36
|
end
|
|
37
37
|
|
|
@@ -63,7 +63,7 @@ module ProducerSubscriber
|
|
|
63
63
|
else
|
|
64
64
|
Fractor::WorkResult.new(
|
|
65
65
|
error: "Unknown work type: #{work.class}",
|
|
66
|
-
work: work
|
|
66
|
+
work: work,
|
|
67
67
|
)
|
|
68
68
|
end
|
|
69
69
|
end
|
|
@@ -80,13 +80,13 @@ module ProducerSubscriber
|
|
|
80
80
|
# Return the result with metadata about sub-works
|
|
81
81
|
result = {
|
|
82
82
|
processed_data: processed_data,
|
|
83
|
-
sub_works: [] # Will be populated by the supervisor
|
|
83
|
+
sub_works: [], # Will be populated by the supervisor
|
|
84
84
|
}
|
|
85
85
|
|
|
86
86
|
# Return a successful result
|
|
87
87
|
Fractor::WorkResult.new(
|
|
88
88
|
result: result,
|
|
89
|
-
work: work
|
|
89
|
+
work: work,
|
|
90
90
|
)
|
|
91
91
|
end
|
|
92
92
|
|
|
@@ -101,9 +101,9 @@ module ProducerSubscriber
|
|
|
101
101
|
Fractor::WorkResult.new(
|
|
102
102
|
result: {
|
|
103
103
|
processed_data: processed_data,
|
|
104
|
-
parent_id: work.parent_id
|
|
104
|
+
parent_id: work.parent_id,
|
|
105
105
|
},
|
|
106
|
-
work: work
|
|
106
|
+
work: work,
|
|
107
107
|
)
|
|
108
108
|
end
|
|
109
109
|
end
|
|
@@ -122,8 +122,8 @@ module ProducerSubscriber
|
|
|
122
122
|
# Create the supervisor
|
|
123
123
|
supervisor = Fractor::Supervisor.new(
|
|
124
124
|
worker_pools: [
|
|
125
|
-
{ worker_class: MultiWorker, num_workers: @worker_count }
|
|
126
|
-
]
|
|
125
|
+
{ worker_class: MultiWorker, num_workers: @worker_count },
|
|
126
|
+
],
|
|
127
127
|
)
|
|
128
128
|
|
|
129
129
|
# Create and add initial work items
|
|
@@ -144,12 +144,14 @@ module ProducerSubscriber
|
|
|
144
144
|
# Create a new supervisor for sub-works
|
|
145
145
|
sub_supervisor = Fractor::Supervisor.new(
|
|
146
146
|
worker_pools: [
|
|
147
|
-
{ worker_class: MultiWorker, num_workers: @worker_count }
|
|
148
|
-
]
|
|
147
|
+
{ worker_class: MultiWorker, num_workers: @worker_count },
|
|
148
|
+
],
|
|
149
149
|
)
|
|
150
150
|
|
|
151
151
|
# Create and add the sub-work items
|
|
152
|
-
sub_work_items = sub_works.map
|
|
152
|
+
sub_work_items = sub_works.map do |sw|
|
|
153
|
+
SubWork.new(sw[:data], sw[:parent_id], sw[:depth])
|
|
154
|
+
end
|
|
153
155
|
sub_supervisor.add_work_items(sub_work_items)
|
|
154
156
|
sub_supervisor.run
|
|
155
157
|
|
|
@@ -179,12 +181,14 @@ module ProducerSubscriber
|
|
|
179
181
|
sub_works << {
|
|
180
182
|
data: sub_data,
|
|
181
183
|
parent_id: work.object_id,
|
|
182
|
-
depth: work.depth + 1
|
|
184
|
+
depth: work.depth + 1,
|
|
183
185
|
}
|
|
184
186
|
end
|
|
185
187
|
|
|
186
188
|
# Store the sub-work IDs in the result for reference
|
|
187
|
-
result.result[:sub_works] = sub_works.last(3).map
|
|
189
|
+
result.result[:sub_works] = sub_works.last(3).map do |sw|
|
|
190
|
+
sw[:parent_id]
|
|
191
|
+
end
|
|
188
192
|
end
|
|
189
193
|
|
|
190
194
|
sub_works
|
|
@@ -195,7 +199,7 @@ module ProducerSubscriber
|
|
|
195
199
|
initial_results.results.each do |result|
|
|
196
200
|
@result_tree[result.work.object_id] = {
|
|
197
201
|
data: result.result[:processed_data],
|
|
198
|
-
children: []
|
|
202
|
+
children: [],
|
|
199
203
|
}
|
|
200
204
|
end
|
|
201
205
|
|
|
@@ -236,7 +240,7 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
236
240
|
documents = [
|
|
237
241
|
"Annual Report 2025",
|
|
238
242
|
"Technical Documentation",
|
|
239
|
-
"Research Paper"
|
|
243
|
+
"Research Paper",
|
|
240
244
|
]
|
|
241
245
|
|
|
242
246
|
worker_count = 4
|
|
@@ -42,7 +42,7 @@ module ScatterGather
|
|
|
42
42
|
error = ArgumentError.new("Unknown source: #{work.source}")
|
|
43
43
|
return Fractor::WorkResult.new(
|
|
44
44
|
error: error,
|
|
45
|
-
work: work
|
|
45
|
+
work: work,
|
|
46
46
|
)
|
|
47
47
|
end
|
|
48
48
|
|
|
@@ -53,9 +53,9 @@ module ScatterGather
|
|
|
53
53
|
query: work.query,
|
|
54
54
|
hits: result[:hits],
|
|
55
55
|
metadata: result[:metadata],
|
|
56
|
-
timing: result[:timing]
|
|
56
|
+
timing: result[:timing],
|
|
57
57
|
},
|
|
58
|
-
work: work
|
|
58
|
+
work: work,
|
|
59
59
|
)
|
|
60
60
|
end
|
|
61
61
|
|
|
@@ -72,12 +72,12 @@ module ScatterGather
|
|
|
72
72
|
|
|
73
73
|
# Generate simulated records
|
|
74
74
|
record_count = rand(3..10)
|
|
75
|
-
hits = record_count
|
|
75
|
+
hits = Array.new(record_count) do |i|
|
|
76
76
|
{
|
|
77
77
|
id: "db-#{i + 1}",
|
|
78
78
|
title: "Database Result #{i + 1} for '#{work.query}'",
|
|
79
79
|
content: "This is database content for #{work.query}",
|
|
80
|
-
relevance: rand(0.1..1.0).round(2)
|
|
80
|
+
relevance: rand(0.1..1.0).round(2),
|
|
81
81
|
}
|
|
82
82
|
end
|
|
83
83
|
|
|
@@ -86,9 +86,9 @@ module ScatterGather
|
|
|
86
86
|
metadata: {
|
|
87
87
|
source_type: "PostgreSQL Database",
|
|
88
88
|
total_available: record_count + rand(10..50),
|
|
89
|
-
query_type: "Full-text search"
|
|
89
|
+
query_type: "Full-text search",
|
|
90
90
|
},
|
|
91
|
-
timing: rand(0.01..0.3).round(3)
|
|
91
|
+
timing: rand(0.01..0.3).round(3),
|
|
92
92
|
}
|
|
93
93
|
end
|
|
94
94
|
|
|
@@ -98,12 +98,12 @@ module ScatterGather
|
|
|
98
98
|
|
|
99
99
|
# Generate simulated API results
|
|
100
100
|
record_count = rand(2..8)
|
|
101
|
-
hits = record_count
|
|
101
|
+
hits = Array.new(record_count) do |i|
|
|
102
102
|
{
|
|
103
103
|
id: "api-#{i + 1}",
|
|
104
104
|
title: "API Result #{i + 1} for '#{work.query}'",
|
|
105
105
|
content: "This is API content for #{work.query}",
|
|
106
|
-
relevance: rand(0.1..1.0).round(2)
|
|
106
|
+
relevance: rand(0.1..1.0).round(2),
|
|
107
107
|
}
|
|
108
108
|
end
|
|
109
109
|
|
|
@@ -112,9 +112,9 @@ module ScatterGather
|
|
|
112
112
|
metadata: {
|
|
113
113
|
source_type: "External REST API",
|
|
114
114
|
provider: %w[Google Bing DuckDuckGo].sample,
|
|
115
|
-
response_code: 200
|
|
115
|
+
response_code: 200,
|
|
116
116
|
},
|
|
117
|
-
timing: rand(0.1..0.5).round(3)
|
|
117
|
+
timing: rand(0.1..0.5).round(3),
|
|
118
118
|
}
|
|
119
119
|
end
|
|
120
120
|
|
|
@@ -128,12 +128,12 @@ module ScatterGather
|
|
|
128
128
|
if cache_hit
|
|
129
129
|
# Cache hit - return cached results
|
|
130
130
|
record_count = rand(1..5)
|
|
131
|
-
hits = record_count
|
|
131
|
+
hits = Array.new(record_count) do |i|
|
|
132
132
|
{
|
|
133
133
|
id: "cache-#{i + 1}",
|
|
134
134
|
title: "Cached Result #{i + 1} for '#{work.query}'",
|
|
135
135
|
content: "This is cached content for #{work.query}",
|
|
136
|
-
relevance: rand(0.1..1.0).round(2)
|
|
136
|
+
relevance: rand(0.1..1.0).round(2),
|
|
137
137
|
}
|
|
138
138
|
end
|
|
139
139
|
|
|
@@ -142,9 +142,9 @@ module ScatterGather
|
|
|
142
142
|
metadata: {
|
|
143
143
|
source_type: "In-memory Cache",
|
|
144
144
|
cache_hit: true,
|
|
145
|
-
age: rand(1..3600)
|
|
145
|
+
age: rand(1..3600),
|
|
146
146
|
},
|
|
147
|
-
timing: rand(0.001..0.05).round(3)
|
|
147
|
+
timing: rand(0.001..0.05).round(3),
|
|
148
148
|
}
|
|
149
149
|
else
|
|
150
150
|
# Cache miss
|
|
@@ -152,9 +152,9 @@ module ScatterGather
|
|
|
152
152
|
hits: [],
|
|
153
153
|
metadata: {
|
|
154
154
|
source_type: "In-memory Cache",
|
|
155
|
-
cache_hit: false
|
|
155
|
+
cache_hit: false,
|
|
156
156
|
},
|
|
157
|
-
timing: rand(0.001..0.01).round(3)
|
|
157
|
+
timing: rand(0.001..0.01).round(3),
|
|
158
158
|
}
|
|
159
159
|
end
|
|
160
160
|
end
|
|
@@ -165,13 +165,13 @@ module ScatterGather
|
|
|
165
165
|
|
|
166
166
|
# Generate simulated file results
|
|
167
167
|
record_count = rand(1..12)
|
|
168
|
-
hits = record_count
|
|
168
|
+
hits = Array.new(record_count) do |i|
|
|
169
169
|
{
|
|
170
170
|
id: "file-#{i + 1}",
|
|
171
171
|
title: "File Result #{i + 1} for '#{work.query}'",
|
|
172
172
|
path: "/path/to/file_#{i + 1}.txt",
|
|
173
173
|
content: "This is file content matching #{work.query}",
|
|
174
|
-
relevance: rand(0.1..1.0).round(2)
|
|
174
|
+
relevance: rand(0.1..1.0).round(2),
|
|
175
175
|
}
|
|
176
176
|
end
|
|
177
177
|
|
|
@@ -180,9 +180,9 @@ module ScatterGather
|
|
|
180
180
|
metadata: {
|
|
181
181
|
source_type: "File System",
|
|
182
182
|
directories_searched: rand(5..20),
|
|
183
|
-
files_scanned: rand(50..500)
|
|
183
|
+
files_scanned: rand(50..500),
|
|
184
184
|
},
|
|
185
|
-
timing: rand(0.01..0.2).round(3)
|
|
185
|
+
timing: rand(0.01..0.2).round(3),
|
|
186
186
|
}
|
|
187
187
|
end
|
|
188
188
|
end
|
|
@@ -194,8 +194,8 @@ module ScatterGather
|
|
|
194
194
|
def initialize(worker_count = 4)
|
|
195
195
|
@supervisor = Fractor::Supervisor.new(
|
|
196
196
|
worker_pools: [
|
|
197
|
-
{ worker_class: SearchWorker, num_workers: worker_count }
|
|
198
|
-
]
|
|
197
|
+
{ worker_class: SearchWorker, num_workers: worker_count },
|
|
198
|
+
],
|
|
199
199
|
)
|
|
200
200
|
|
|
201
201
|
@merged_results = nil
|
|
@@ -204,10 +204,11 @@ module ScatterGather
|
|
|
204
204
|
def search(query, sources = nil)
|
|
205
205
|
# Define search sources with their parameters
|
|
206
206
|
sources ||= [
|
|
207
|
-
{ source: :database,
|
|
207
|
+
{ source: :database,
|
|
208
|
+
params: { max_results: 50, include_archived: false } },
|
|
208
209
|
{ source: :api, params: { format: "json", timeout: 5 } },
|
|
209
210
|
{ source: :cache, params: { max_age: 3600 } },
|
|
210
|
-
{ source: :filesystem, params: { extensions: %w[txt md pdf] } }
|
|
211
|
+
{ source: :filesystem, params: { extensions: %w[txt md pdf] } },
|
|
211
212
|
]
|
|
212
213
|
|
|
213
214
|
start_time = Time.now
|
|
@@ -262,7 +263,7 @@ module ScatterGather
|
|
|
262
263
|
content: hit[:content],
|
|
263
264
|
source: source,
|
|
264
265
|
original_relevance: hit[:relevance],
|
|
265
|
-
weighted_relevance: hit[:relevance] * source_weight
|
|
266
|
+
weighted_relevance: hit[:relevance] * source_weight,
|
|
266
267
|
}
|
|
267
268
|
end
|
|
268
269
|
end
|
|
@@ -277,7 +278,7 @@ module ScatterGather
|
|
|
277
278
|
execution_time: total_time,
|
|
278
279
|
sources: results_by_source.keys,
|
|
279
280
|
ranked_results: ranked_hits,
|
|
280
|
-
source_details: results_by_source
|
|
281
|
+
source_details: results_by_source,
|
|
281
282
|
}
|
|
282
283
|
end
|
|
283
284
|
end
|
|
@@ -309,7 +310,7 @@ if __FILE__ == $PROGRAM_NAME
|
|
|
309
310
|
puts "Query: #{results[:query]}"
|
|
310
311
|
puts "Total hits: #{results[:total_hits]}"
|
|
311
312
|
puts "Total execution time: #{results[:execution_time].round(3)} seconds"
|
|
312
|
-
puts "Sources searched: #{results[:sources].join(
|
|
313
|
+
puts "Sources searched: #{results[:sources].join(', ')}"
|
|
313
314
|
puts
|
|
314
315
|
|
|
315
316
|
puts "Top 5 Results (by relevance):"
|