fractor 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +2 -0
- data/.rubocop_todo.yml +82 -0
- data/README.adoc +281 -41
- data/examples/hierarchical_hasher/README.adoc +75 -0
- data/examples/hierarchical_hasher/hierarchical_hasher.rb +150 -0
- data/examples/multi_work_type/README.adoc +45 -0
- data/examples/multi_work_type/multi_work_type.rb +319 -0
- data/examples/pipeline_processing/README.adoc +44 -0
- data/examples/pipeline_processing/pipeline_processing.rb +216 -0
- data/examples/producer_subscriber/README.adoc +92 -0
- data/examples/producer_subscriber/producer_subscriber.rb +256 -0
- data/examples/scatter_gather/README.adoc +43 -0
- data/examples/scatter_gather/scatter_gather.rb +327 -0
- data/examples/simple/sample.rb +101 -0
- data/examples/specialized_workers/README.adoc +45 -0
- data/examples/specialized_workers/specialized_workers.rb +395 -0
- data/lib/fractor/result_aggregator.rb +10 -1
- data/lib/fractor/supervisor.rb +167 -70
- data/lib/fractor/version.rb +1 -1
- data/lib/fractor.rb +7 -9
- metadata +16 -5
- data/examples/hierarchical_hasher.rb +0 -158
- data/examples/producer_subscriber.rb +0 -300
- data/sample.rb +0 -64
@@ -0,0 +1,327 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "../../lib/fractor"
|
4
|
+
|
5
|
+
module ScatterGather
|
6
|
+
# Specialized work for different search sources
|
7
|
+
class SearchWork < Fractor::Work
|
8
|
+
def initialize(query, source = :default, query_params = {})
|
9
|
+
super({ query: query, source: source, query_params: query_params })
|
10
|
+
end
|
11
|
+
|
12
|
+
def query
|
13
|
+
input[:query]
|
14
|
+
end
|
15
|
+
|
16
|
+
def source
|
17
|
+
input[:source]
|
18
|
+
end
|
19
|
+
|
20
|
+
def query_params
|
21
|
+
input[:query_params]
|
22
|
+
end
|
23
|
+
|
24
|
+
def to_s
|
25
|
+
"SearchWork: source=#{source}, params=#{query_params}, query=#{query}"
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
# Worker specialized for different data sources
|
30
|
+
class SearchWorker < Fractor::Worker
|
31
|
+
def process(work)
|
32
|
+
# Simulate database connection setup
|
33
|
+
setup_source(work.source)
|
34
|
+
|
35
|
+
# Process based on source type
|
36
|
+
result = case work.source
|
37
|
+
when :database then search_database(work)
|
38
|
+
when :api then search_api(work)
|
39
|
+
when :cache then search_cache(work)
|
40
|
+
when :filesystem then search_filesystem(work)
|
41
|
+
else
|
42
|
+
error = ArgumentError.new("Unknown source: #{work.source}")
|
43
|
+
return Fractor::WorkResult.new(
|
44
|
+
error: error,
|
45
|
+
work: work
|
46
|
+
)
|
47
|
+
end
|
48
|
+
|
49
|
+
# Return result with source information for merging
|
50
|
+
Fractor::WorkResult.new(
|
51
|
+
result: {
|
52
|
+
source: work.source,
|
53
|
+
query: work.query,
|
54
|
+
hits: result[:hits],
|
55
|
+
metadata: result[:metadata],
|
56
|
+
timing: result[:timing]
|
57
|
+
},
|
58
|
+
work: work
|
59
|
+
)
|
60
|
+
end
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def setup_source(_source)
|
65
|
+
# Simulate connection/initialization time
|
66
|
+
sleep(rand(0.01..0.05))
|
67
|
+
end
|
68
|
+
|
69
|
+
def search_database(work)
|
70
|
+
# Simulate database query
|
71
|
+
sleep(rand(0.05..0.2))
|
72
|
+
|
73
|
+
# Generate simulated records
|
74
|
+
record_count = rand(3..10)
|
75
|
+
hits = record_count.times.map do |i|
|
76
|
+
{
|
77
|
+
id: "db-#{i + 1}",
|
78
|
+
title: "Database Result #{i + 1} for '#{work.query}'",
|
79
|
+
content: "This is database content for #{work.query}",
|
80
|
+
relevance: rand(0.1..1.0).round(2)
|
81
|
+
}
|
82
|
+
end
|
83
|
+
|
84
|
+
{
|
85
|
+
hits: hits,
|
86
|
+
metadata: {
|
87
|
+
source_type: "PostgreSQL Database",
|
88
|
+
total_available: record_count + rand(10..50),
|
89
|
+
query_type: "Full-text search"
|
90
|
+
},
|
91
|
+
timing: rand(0.01..0.3).round(3)
|
92
|
+
}
|
93
|
+
end
|
94
|
+
|
95
|
+
def search_api(work)
|
96
|
+
# Simulate API request
|
97
|
+
sleep(rand(0.1..0.3))
|
98
|
+
|
99
|
+
# Generate simulated API results
|
100
|
+
record_count = rand(2..8)
|
101
|
+
hits = record_count.times.map do |i|
|
102
|
+
{
|
103
|
+
id: "api-#{i + 1}",
|
104
|
+
title: "API Result #{i + 1} for '#{work.query}'",
|
105
|
+
content: "This is API content for #{work.query}",
|
106
|
+
relevance: rand(0.1..1.0).round(2)
|
107
|
+
}
|
108
|
+
end
|
109
|
+
|
110
|
+
{
|
111
|
+
hits: hits,
|
112
|
+
metadata: {
|
113
|
+
source_type: "External REST API",
|
114
|
+
provider: %w[Google Bing DuckDuckGo].sample,
|
115
|
+
response_code: 200
|
116
|
+
},
|
117
|
+
timing: rand(0.1..0.5).round(3)
|
118
|
+
}
|
119
|
+
end
|
120
|
+
|
121
|
+
def search_cache(work)
|
122
|
+
# Simulate cache lookup
|
123
|
+
sleep(rand(0.01..0.1))
|
124
|
+
|
125
|
+
# Simulate cache hit or miss
|
126
|
+
cache_hit = [true, true, false].sample
|
127
|
+
|
128
|
+
if cache_hit
|
129
|
+
# Cache hit - return cached results
|
130
|
+
record_count = rand(1..5)
|
131
|
+
hits = record_count.times.map do |i|
|
132
|
+
{
|
133
|
+
id: "cache-#{i + 1}",
|
134
|
+
title: "Cached Result #{i + 1} for '#{work.query}'",
|
135
|
+
content: "This is cached content for #{work.query}",
|
136
|
+
relevance: rand(0.1..1.0).round(2)
|
137
|
+
}
|
138
|
+
end
|
139
|
+
|
140
|
+
{
|
141
|
+
hits: hits,
|
142
|
+
metadata: {
|
143
|
+
source_type: "In-memory Cache",
|
144
|
+
cache_hit: true,
|
145
|
+
age: rand(1..3600)
|
146
|
+
},
|
147
|
+
timing: rand(0.001..0.05).round(3)
|
148
|
+
}
|
149
|
+
else
|
150
|
+
# Cache miss
|
151
|
+
{
|
152
|
+
hits: [],
|
153
|
+
metadata: {
|
154
|
+
source_type: "In-memory Cache",
|
155
|
+
cache_hit: false
|
156
|
+
},
|
157
|
+
timing: rand(0.001..0.01).round(3)
|
158
|
+
}
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def search_filesystem(work)
|
163
|
+
# Simulate file system search
|
164
|
+
sleep(rand(0.05..0.2))
|
165
|
+
|
166
|
+
# Generate simulated file results
|
167
|
+
record_count = rand(1..12)
|
168
|
+
hits = record_count.times.map do |i|
|
169
|
+
{
|
170
|
+
id: "file-#{i + 1}",
|
171
|
+
title: "File Result #{i + 1} for '#{work.query}'",
|
172
|
+
path: "/path/to/file_#{i + 1}.txt",
|
173
|
+
content: "This is file content matching #{work.query}",
|
174
|
+
relevance: rand(0.1..1.0).round(2)
|
175
|
+
}
|
176
|
+
end
|
177
|
+
|
178
|
+
{
|
179
|
+
hits: hits,
|
180
|
+
metadata: {
|
181
|
+
source_type: "File System",
|
182
|
+
directories_searched: rand(5..20),
|
183
|
+
files_scanned: rand(50..500)
|
184
|
+
},
|
185
|
+
timing: rand(0.01..0.2).round(3)
|
186
|
+
}
|
187
|
+
end
|
188
|
+
end
|
189
|
+
|
190
|
+
# Controller for the multi-source search
|
191
|
+
class MultiSourceSearch
|
192
|
+
attr_reader :merged_results
|
193
|
+
|
194
|
+
def initialize(worker_count = 4)
|
195
|
+
@supervisor = Fractor::Supervisor.new(
|
196
|
+
worker_pools: [
|
197
|
+
{ worker_class: SearchWorker, num_workers: worker_count }
|
198
|
+
]
|
199
|
+
)
|
200
|
+
|
201
|
+
@merged_results = nil
|
202
|
+
end
|
203
|
+
|
204
|
+
def search(query, sources = nil)
|
205
|
+
# Define search sources with their parameters
|
206
|
+
sources ||= [
|
207
|
+
{ source: :database, params: { max_results: 50, include_archived: false } },
|
208
|
+
{ source: :api, params: { format: "json", timeout: 5 } },
|
209
|
+
{ source: :cache, params: { max_age: 3600 } },
|
210
|
+
{ source: :filesystem, params: { extensions: %w[txt md pdf] } }
|
211
|
+
]
|
212
|
+
|
213
|
+
start_time = Time.now
|
214
|
+
|
215
|
+
# Create work items and run the searches in parallel
|
216
|
+
search_work_items = sources.map do |source|
|
217
|
+
SearchWork.new(query, source[:source], source[:params])
|
218
|
+
end
|
219
|
+
@supervisor.add_work_items(search_work_items)
|
220
|
+
@supervisor.run
|
221
|
+
|
222
|
+
end_time = Time.now
|
223
|
+
total_time = end_time - start_time
|
224
|
+
|
225
|
+
# Merge results with source-specific relevance rules
|
226
|
+
@merged_results = merge_results(@supervisor.results, total_time)
|
227
|
+
|
228
|
+
@merged_results
|
229
|
+
end
|
230
|
+
|
231
|
+
private
|
232
|
+
|
233
|
+
def merge_results(results_aggregator, total_time)
|
234
|
+
# Group results by source using a standard approach
|
235
|
+
# This is more reliable than using Ractors for this simple aggregation
|
236
|
+
results_by_source = {}
|
237
|
+
total_hits = 0
|
238
|
+
|
239
|
+
results_aggregator.results.each do |result|
|
240
|
+
source = result.result[:source]
|
241
|
+
results_by_source[source] = result.result
|
242
|
+
total_hits += result.result[:hits].size
|
243
|
+
end
|
244
|
+
|
245
|
+
# Create combined and ranked results
|
246
|
+
all_hits = []
|
247
|
+
results_by_source.each do |source, result|
|
248
|
+
# Add source-specific weight
|
249
|
+
source_weight = case source
|
250
|
+
when :database then 1.0
|
251
|
+
when :api then 0.8
|
252
|
+
when :cache then 1.2 # Prioritize cache
|
253
|
+
when :filesystem then 0.9
|
254
|
+
else 0.5
|
255
|
+
end
|
256
|
+
|
257
|
+
# Add weighted hits to combined list
|
258
|
+
result[:hits].each do |hit|
|
259
|
+
all_hits << {
|
260
|
+
id: hit[:id],
|
261
|
+
title: hit[:title],
|
262
|
+
content: hit[:content],
|
263
|
+
source: source,
|
264
|
+
original_relevance: hit[:relevance],
|
265
|
+
weighted_relevance: hit[:relevance] * source_weight
|
266
|
+
}
|
267
|
+
end
|
268
|
+
end
|
269
|
+
|
270
|
+
# Sort by weighted relevance
|
271
|
+
ranked_hits = all_hits.sort_by { |hit| -hit[:weighted_relevance] }
|
272
|
+
|
273
|
+
# Return merged results
|
274
|
+
{
|
275
|
+
query: results_by_source.values.first&.dig(:query),
|
276
|
+
total_hits: total_hits,
|
277
|
+
execution_time: total_time,
|
278
|
+
sources: results_by_source.keys,
|
279
|
+
ranked_results: ranked_hits,
|
280
|
+
source_details: results_by_source
|
281
|
+
}
|
282
|
+
end
|
283
|
+
end
|
284
|
+
end
|
285
|
+
|
286
|
+
# Example usage
|
287
|
+
if __FILE__ == $PROGRAM_NAME
|
288
|
+
puts "Starting Scatter-Gather Search Example"
|
289
|
+
puts "======================================"
|
290
|
+
puts "This example demonstrates searching multiple data sources concurrently:"
|
291
|
+
puts "1. Database - Simulates SQL database searches"
|
292
|
+
puts "2. API - Simulates external REST API calls"
|
293
|
+
puts "3. Cache - Simulates in-memory cache lookups"
|
294
|
+
puts "4. Filesystem - Simulates searching through files"
|
295
|
+
puts
|
296
|
+
|
297
|
+
# Sample query
|
298
|
+
query = ARGV[0] || "ruby concurrency patterns"
|
299
|
+
worker_count = (ARGV[1] || 4).to_i
|
300
|
+
|
301
|
+
puts "Searching for: '#{query}' using #{worker_count} workers..." if ENV["FRACTOR_DEBUG"]
|
302
|
+
puts if ENV["FRACTOR_DEBUG"]
|
303
|
+
|
304
|
+
search = ScatterGather::MultiSourceSearch.new(worker_count)
|
305
|
+
results = search.search(query)
|
306
|
+
|
307
|
+
puts "Search Results Summary:"
|
308
|
+
puts "----------------------"
|
309
|
+
puts "Query: #{results[:query]}"
|
310
|
+
puts "Total hits: #{results[:total_hits]}"
|
311
|
+
puts "Total execution time: #{results[:execution_time].round(3)} seconds"
|
312
|
+
puts "Sources searched: #{results[:sources].join(", ")}"
|
313
|
+
puts
|
314
|
+
|
315
|
+
puts "Top 5 Results (by relevance):"
|
316
|
+
results[:ranked_results].take(5).each_with_index do |hit, index|
|
317
|
+
puts "#{index + 1}. #{hit[:title]} (Source: #{hit[:source]}, Relevance: #{hit[:weighted_relevance].round(2)})"
|
318
|
+
puts " #{hit[:content][0..60]}..."
|
319
|
+
puts
|
320
|
+
end
|
321
|
+
|
322
|
+
puts "Source Details:"
|
323
|
+
results[:source_details].each do |source, details|
|
324
|
+
puts "- #{source.to_s.capitalize} (#{details[:hits].size} results, #{details[:timing]} sec)"
|
325
|
+
puts " Metadata: #{details[:metadata]}"
|
326
|
+
end
|
327
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# frozen_string_literal: true
|
3
|
+
|
4
|
+
require_relative "fractor"
|
5
|
+
|
6
|
+
# Client-specific work item implementation inheriting from Fractor::Work
|
7
|
+
class MyWork < Fractor::Work
|
8
|
+
# Constructor storing all data in the input hash
|
9
|
+
def initialize(value)
|
10
|
+
super({ value: value })
|
11
|
+
end
|
12
|
+
|
13
|
+
def value
|
14
|
+
input[:value]
|
15
|
+
end
|
16
|
+
|
17
|
+
def to_s
|
18
|
+
"MyWork: #{value}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
# Another work type for demonstrating multiple work types
|
23
|
+
class OtherWork < Fractor::Work
|
24
|
+
# Constructor storing all data in the input hash
|
25
|
+
def initialize(value)
|
26
|
+
super({ value: value })
|
27
|
+
end
|
28
|
+
|
29
|
+
def value
|
30
|
+
input[:value]
|
31
|
+
end
|
32
|
+
|
33
|
+
def to_s
|
34
|
+
"OtherWork: #{value}"
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
# Client-specific worker implementation inheriting from Fractor::Worker
|
39
|
+
class MyWorker < Fractor::Worker
|
40
|
+
# This method is called by the Ractor to process the work
|
41
|
+
# It should return a Fractor::WorkResult object
|
42
|
+
def process(work)
|
43
|
+
# Only print debug information if FRACTOR_DEBUG is enabled
|
44
|
+
puts "Working on '#{work.inspect}'" if ENV["FRACTOR_DEBUG"]
|
45
|
+
|
46
|
+
# Check work type and handle accordingly
|
47
|
+
if work.is_a?(MyWork)
|
48
|
+
if work.value == 5
|
49
|
+
# Return a Fractor::WorkResult for errors
|
50
|
+
# Store the error object, not just the string
|
51
|
+
error = StandardError.new("Cannot process value 5")
|
52
|
+
return Fractor::WorkResult.new(error: error, work: work)
|
53
|
+
end
|
54
|
+
|
55
|
+
calculated = work.value * 2
|
56
|
+
# Return a Fractor::WorkResult for success
|
57
|
+
Fractor::WorkResult.new(result: calculated, work: work)
|
58
|
+
elsif work.is_a?(OtherWork)
|
59
|
+
# Process OtherWork differently
|
60
|
+
Fractor::WorkResult.new(result: "Processed: #{work.value}", work: work)
|
61
|
+
else
|
62
|
+
# Handle unexpected work types - create a proper error object
|
63
|
+
error = TypeError.new("Unsupported work type: #{work.class}")
|
64
|
+
Fractor::WorkResult.new(error: error, work: work)
|
65
|
+
end
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
# --- Main Execution ---
|
70
|
+
# This section demonstrates how to use the Fractor framework with custom
|
71
|
+
# MyWorker and MyWork classes.
|
72
|
+
if __FILE__ == $PROGRAM_NAME
|
73
|
+
# Create supervisor, passing the client-specific worker class in a worker pool
|
74
|
+
supervisor = Fractor::Supervisor.new(
|
75
|
+
worker_pools: [
|
76
|
+
{ worker_class: MyWorker, num_workers: 2 } # Specify the worker class and number of worker Ractors
|
77
|
+
]
|
78
|
+
)
|
79
|
+
|
80
|
+
# Create Work objects and add them to the supervisor
|
81
|
+
work_items = (1..10).map { |i| MyWork.new(i) }
|
82
|
+
supervisor.add_work_items(work_items)
|
83
|
+
|
84
|
+
# Run the supervisor to start processing work
|
85
|
+
supervisor.run
|
86
|
+
|
87
|
+
puts "Processing complete."
|
88
|
+
puts "Final Aggregated Results:"
|
89
|
+
# Access the results aggregator from the supervisor
|
90
|
+
puts supervisor.results.inspect
|
91
|
+
|
92
|
+
# Print failed items directly from the Fractor::ResultAggregator's errors array
|
93
|
+
failed_items = supervisor.results.errors # Access the errors array
|
94
|
+
puts "\nFailed Work Items (#{failed_items.size}):"
|
95
|
+
|
96
|
+
# Display error information properly using the error object
|
97
|
+
failed_items.each do |error_result|
|
98
|
+
puts "Work: #{error_result.work.inspect}"
|
99
|
+
puts "Error: #{error_result.error.class}: #{error_result.error.message}"
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
= Specialized Workers Example
|
2
|
+
|
3
|
+
== Overview
|
4
|
+
|
5
|
+
This example demonstrates how to create specialized worker types in Fractor, each designed to handle specific kinds of work. This pattern is useful when different work items require fundamentally different processing approaches.
|
6
|
+
|
7
|
+
== Key Concepts
|
8
|
+
|
9
|
+
* *Specialized Workers*: Worker classes designed for specific types of tasks
|
10
|
+
* *Work Type Differentiation*: Each worker specializes in processing a particular category of work
|
11
|
+
* *Resource Optimization*: Workers can be tailored to the specific resources needed by each work type
|
12
|
+
* *Domain-Specific Processing*: Separate worker implementations for different processing domains
|
13
|
+
|
14
|
+
== Example Explanation
|
15
|
+
|
16
|
+
This example implements two specialized worker types:
|
17
|
+
|
18
|
+
1. *ComputeWorker*: Handles compute-intensive operations like matrix multiplication, image transformations, and path finding
|
19
|
+
2. *DatabaseWorker*: Handles database operations like queries, insertions, updates, and deletions
|
20
|
+
|
21
|
+
Each worker is optimized for its specific task domain and processes only the work types it is designed to handle.
|
22
|
+
|
23
|
+
== Features Demonstrated
|
24
|
+
|
25
|
+
* Creating specialized worker types to handle different categories of work
|
26
|
+
* Routing work items to the appropriate worker type
|
27
|
+
* Resource optimization for different processing needs
|
28
|
+
* Independent error handling for each worker type
|
29
|
+
* Combining results from different worker types
|
30
|
+
|
31
|
+
== Running the Example
|
32
|
+
|
33
|
+
[source,sh]
|
34
|
+
----
|
35
|
+
ruby examples/specialized_workers/specialized_workers.rb
|
36
|
+
----
|
37
|
+
|
38
|
+
== Expected Output
|
39
|
+
|
40
|
+
The example will show:
|
41
|
+
* Creation of different worker types
|
42
|
+
* Processing of specialized work items by their corresponding workers
|
43
|
+
* Performance metrics for each work type
|
44
|
+
* Separate results from each worker type
|
45
|
+
* Overall processing statistics
|