jruby_parallel_processing 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.rspec +3 -0
- data/CHANGELOG.md +5 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +40 -0
- data/LICENSE.txt +21 -0
- data/README.md +177 -0
- data/Rakefile +8 -0
- data/lib/jruby_parallel_processing/api_request_processor.rb +88 -0
- data/lib/jruby_parallel_processing/application_callable.rb +15 -0
- data/lib/jruby_parallel_processing/chunk_processor.rb +13 -0
- data/lib/jruby_parallel_processing/data_processor.rb +93 -0
- data/lib/jruby_parallel_processing/distributed_worker.rb +84 -0
- data/lib/jruby_parallel_processing/stream_processor_callable.rb +28 -0
- data/lib/jruby_parallel_processing/task_callable.rb +20 -0
- data/lib/jruby_parallel_processing/task_queue.rb +56 -0
- data/lib/jruby_parallel_processing/version.rb +5 -0
- data/lib/jruby_parallel_processing.rb +28 -0
- data/sig/jruby_parallel_processing.rbs +4 -0
- metadata +69 -0
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: fbbe1ea28c91fad4e363c47c0022570c78d0b23704b5d3f64dd9650685a90200
|
4
|
+
data.tar.gz: 996cdc75cb9fa9edc861799938799449d432749674d998c19dc454b352bc7451
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 8ea06ce65c37b74548970bf8f33b2fc959116199ee2358578cd3824a3fb9ced3d1646e313722021469f584cd8cdb9d9a3ff3c42e4b62f699be9a8ab9fa01c847
|
7
|
+
data.tar.gz: de99438a31bab4456c20b8ee3a6b9b88b59f1fc21c1a54163d703298922fcc905c37292e0d0856083cf5bc14a9767633d7bc1fc473f994927b4eb1ed3d794287
|
data/.rspec
ADDED
data/CHANGELOG.md
ADDED
data/Gemfile
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
source "https://rubygems.org"
|
4
|
+
|
5
|
+
# Specify your gem's dependencies in jruby_parallel_processing.gemspec
|
6
|
+
gemspec
|
7
|
+
|
8
|
+
gem "rake", "~> 13.0"
|
9
|
+
|
10
|
+
gem "rspec", "~> 3.0"
|
11
|
+
|
12
|
+
gem "drb", "~> 2.2"
|
13
|
+
|
14
|
+
gem "nokogiri", "~> 1.16"
|
data/Gemfile.lock
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
PATH
|
2
|
+
remote: .
|
3
|
+
specs:
|
4
|
+
jruby_parallel_processing (0.1.0)
|
5
|
+
|
6
|
+
GEM
|
7
|
+
remote: https://rubygems.org/
|
8
|
+
specs:
|
9
|
+
diff-lcs (1.5.1)
|
10
|
+
drb (2.2.1)
|
11
|
+
nokogiri (1.16.7-java)
|
12
|
+
racc (~> 1.4)
|
13
|
+
racc (1.8.1-java)
|
14
|
+
rake (13.2.1)
|
15
|
+
rspec (3.13.0)
|
16
|
+
rspec-core (~> 3.13.0)
|
17
|
+
rspec-expectations (~> 3.13.0)
|
18
|
+
rspec-mocks (~> 3.13.0)
|
19
|
+
rspec-core (3.13.0)
|
20
|
+
rspec-support (~> 3.13.0)
|
21
|
+
rspec-expectations (3.13.2)
|
22
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
23
|
+
rspec-support (~> 3.13.0)
|
24
|
+
rspec-mocks (3.13.1)
|
25
|
+
diff-lcs (>= 1.2.0, < 2.0)
|
26
|
+
rspec-support (~> 3.13.0)
|
27
|
+
rspec-support (3.13.1)
|
28
|
+
|
29
|
+
PLATFORMS
|
30
|
+
universal-java-17
|
31
|
+
|
32
|
+
DEPENDENCIES
|
33
|
+
drb (~> 2.2)
|
34
|
+
jruby_parallel_processing!
|
35
|
+
nokogiri (~> 1.16)
|
36
|
+
rake (~> 13.0)
|
37
|
+
rspec (~> 3.0)
|
38
|
+
|
39
|
+
BUNDLED WITH
|
40
|
+
2.3.26
|
data/LICENSE.txt
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
The MIT License (MIT)
|
2
|
+
|
3
|
+
Copyright (c) 2024 Netsky
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
6
|
+
of this software and associated documentation files (the "Software"), to deal
|
7
|
+
in the Software without restriction, including without limitation the rights
|
8
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
9
|
+
copies of the Software, and to permit persons to whom the Software is
|
10
|
+
furnished to do so, subject to the following conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be included in
|
13
|
+
all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
16
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
17
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
18
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
19
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
20
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
|
21
|
+
THE SOFTWARE.
|
data/README.md
ADDED
@@ -0,0 +1,177 @@
|
|
1
|
+
# JRubyParallelProcessing
|
2
|
+
## Table of Contents
|
3
|
+
- [Overview](#overview)
|
4
|
+
- [Installation](#installation)
|
5
|
+
- [Usage](#usage)
|
6
|
+
- [DataProcessor](#dataprocessor)
|
7
|
+
- [Basic Example](#basic-example)
|
8
|
+
- [Features](#features)
|
9
|
+
- [Configuration](#configuration)
|
10
|
+
- [TaskQueue](#taskqueue)
|
11
|
+
- [Basic Example](#basic-example-1)
|
12
|
+
- [Features](#features-1)
|
13
|
+
- [Configuration](#configuration-1)
|
14
|
+
- [ApiRequestProcessor](#apirequestprocessor)
|
15
|
+
- [Basic Example](#basic-example-2)
|
16
|
+
- [Features](#features-2)
|
17
|
+
- [Configuration](#configuration-2)
|
18
|
+
- [DistributedWorker](#distributedworker)
|
19
|
+
- [Basic Example](#basic-example-3)
|
20
|
+
- [Features](#features-3)
|
21
|
+
- [Configuration Options](#configuration-options)
|
22
|
+
- [License](#license)
|
23
|
+
## Overview
|
24
|
+
|
25
|
+
`JRubyParallelProcessing` is a gem designed for parallel processing tasks in JRuby. It offers efficient data processing and API request handling using multiple threads, along with a task queue system for managing and executing tasks with priority and retries.
|
26
|
+
|
27
|
+
## Installation
|
28
|
+
Add this gem to your Gemfile:
|
29
|
+
``` ruby
|
30
|
+
gem 'jruby_parallel_processing'
|
31
|
+
```
|
32
|
+
## Usage
|
33
|
+
|
34
|
+
### DataProcessor
|
35
|
+
`DataProcessor` is a class designed for parallel data processing. It allows you to split data into chunks and process them across multiple threads, which significantly speeds up task execution. Additionally, it supports middleware hooks for customizing the behavior before and after processing.
|
36
|
+
|
37
|
+
#### Basic Example
|
38
|
+
``` ruby
|
39
|
+
require 'jruby_parallel_processing'
|
40
|
+
data = (1..100).to_a
|
41
|
+
processor = JRubyParallelProcessing::DataProcessor.new(data_array: data, in_threads: 4)
|
42
|
+
processor.process do |item|
|
43
|
+
# Your processing logic here
|
44
|
+
puts item
|
45
|
+
end
|
46
|
+
```
|
47
|
+
#### Features
|
48
|
+
- Efficient Parallel Processing: Process data using multiple threads.
|
49
|
+
- Chunk Management: Automatically splits data into manageable chunks.
|
50
|
+
- Stream Support: Handles data processing from streams (e.g., IO, StringIO).
|
51
|
+
- Middleware: Supports before and after processing middleware for custom logic injection.
|
52
|
+
|
53
|
+
#### Configuration
|
54
|
+
- data_array: Array of data to be processed.
|
55
|
+
- stream: Stream object (e.g., IO, StringIO) for processing data line-by-line.
|
56
|
+
- in_threads: Number of threads for parallel processing (default is 4).
|
57
|
+
- chunk_size: Size of chunks for processing.
|
58
|
+
- logger: Logger object for custom logging.
|
59
|
+
- queue_size: Size of the queue for stream processing (default is 100).
|
60
|
+
- timeout: Timeout for stream processing (default is 10 seconds).
|
61
|
+
|
62
|
+
### Example Usage
|
63
|
+
#### Middleware Example
|
64
|
+
``` ruby
|
65
|
+
processor = JRubyParallelProcessing::DataProcessor.new(data_array: data, in_threads: 4)
|
66
|
+
# Add before processing middleware
|
67
|
+
processor.add_middleware(:before_process) do
|
68
|
+
puts "Starting processing..."
|
69
|
+
end
|
70
|
+
# Add after processing middleware
|
71
|
+
processor.add_middleware(:after_process) do
|
72
|
+
puts "Finished processing."
|
73
|
+
end
|
74
|
+
processor.process do |item|
|
75
|
+
puts item
|
76
|
+
end
|
77
|
+
```
|
78
|
+
#### Data Processing from Stream:
|
79
|
+
```ruby
|
80
|
+
require 'stringio'
|
81
|
+
require 'jruby_parallel_processing'
|
82
|
+
stream = StringIO.new("line 1\nline 2\nline 3\n")
|
83
|
+
processor = JRubyParallelProcessing::DataProcessor.new(stream: stream, in_threads: 2)
|
84
|
+
processor.process do |line|
|
85
|
+
puts line
|
86
|
+
end
|
87
|
+
```
|
88
|
+
|
89
|
+
## TaskQueue
|
90
|
+
`TaskQueue` is a class for managing and executing tasks with priority, retries, and custom configurations. It allows for efficient task scheduling and error handling.
|
91
|
+
|
92
|
+
#### Basic Example
|
93
|
+
```ruby
|
94
|
+
require 'jruby_parallel_processing'
|
95
|
+
task_queue = JRubyParallelProcessing::TaskQueue.new(max_retries: 3, retry_delay: 0.1, max_queue_size: 10)
|
96
|
+
task_queue.add_task(1) do
|
97
|
+
# Your task logic here
|
98
|
+
puts "Task executed"
|
99
|
+
end
|
100
|
+
task_queue.process_tasks
|
101
|
+
```
|
102
|
+
#### Features
|
103
|
+
|
104
|
+
- Adds tasks to the queue with priority.
|
105
|
+
- Retries failed tasks with configurable retry count and delay.
|
106
|
+
- Handles task execution using a fixed thread pool.
|
107
|
+
|
108
|
+
#### Configuration
|
109
|
+
- max_retries: Maximum number of retries for failed tasks (default is
|
110
|
+
3).
|
111
|
+
- retry_delay: Delay between retries in seconds (default is 0.1).
|
112
|
+
- max_queue_size: Maximum number of tasks in the queue (default is 10).
|
113
|
+
- logger: Logger object for custom logging.
|
114
|
+
|
115
|
+
## ApiRequestProcessor
|
116
|
+
`ApiRequestProcessor` handles parallel API requests with built-in error handling and retries.
|
117
|
+
|
118
|
+
#### Basic Example
|
119
|
+
```ruby
|
120
|
+
urls = ["https://api.example.com/data", "https://api.example.com/other"]
|
121
|
+
processor = JRubyParallelProcessing::ApiRequestProcessor.new(urls, in_threads: 4)
|
122
|
+
results = processor.process(http_method: :get) do |response|
|
123
|
+
puts "Received response: #{response.body}"
|
124
|
+
end
|
125
|
+
```
|
126
|
+
|
127
|
+
|
128
|
+
#### Features
|
129
|
+
|
130
|
+
- Parallel API requests with configurable HTTP methods, timeouts, and
|
131
|
+
retries.
|
132
|
+
- Supports custom headers and parameters for requests.
|
133
|
+
- Automatically parses responses in various formats.
|
134
|
+
|
135
|
+
#### Configuration
|
136
|
+
|
137
|
+
- urls: Array of URLs for API requests.
|
138
|
+
- in_threads: Number of threads for parallel API requests (default is
|
139
|
+
4).
|
140
|
+
- timeout: Timeout for API requests (default is 5 seconds).
|
141
|
+
- retries: Number of retries for failed requests (default is 3).
|
142
|
+
- logger: Logger object for custom logging.
|
143
|
+
- http_method: HTTP method to use (default is GET).
|
144
|
+
- headers: Custom headers for requests.
|
145
|
+
- params: Parameters for POST/PUT requests.
|
146
|
+
|
147
|
+
|
148
|
+
## DistributedWorker
|
149
|
+
`DistributedWorker` is a class for managing distributed tasks across multiple worker nodes. It enables task distribution, prioritization, and ensures that workers remain active through a heartbeat mechanism.
|
150
|
+
|
151
|
+
#### Basic Example
|
152
|
+
```ruby
|
153
|
+
require 'jruby_parallel_processing'
|
154
|
+
# Initialize a DistributedWorker instance
|
155
|
+
worker = JRubyParallelProcessing::DistributedWorker.new("localhost", 8787)
|
156
|
+
# Queue a task with a given priority
|
157
|
+
result = worker.execute_task(-> { puts "Task executed" }, priority: 5)
|
158
|
+
puts result[:status] # :queued
|
159
|
+
# To connect to an existing worker
|
160
|
+
worker_url = "druby://localhost:8787"
|
161
|
+
remote_worker = JRubyParallelProcessing::DistributedWorker.connect_to_worker(worker_url)
|
162
|
+
# Send a heartbeat to the worker
|
163
|
+
remote_worker.send_heartbeat
|
164
|
+
```
|
165
|
+
#### Features
|
166
|
+
- Distributed Task Execution: Distribute tasks to different worker nodes.
|
167
|
+
- Task Prioritization: Tasks are queued with priority, ensuring high-priority tasks are processed first.
|
168
|
+
- Heartbeat Mechanism: Ensures worker nodes remain active and can report their status.
|
169
|
+
- Fault-Tolerant Task Queue: Handles errors during task queueing and processing.
|
170
|
+
|
171
|
+
#### Configuration Options
|
172
|
+
- host: Host address for the DistributedWorker service (default is localhost).
|
173
|
+
- port: Port for the DistributedWorker service (default is 8787).
|
174
|
+
- priority: Priority of the task being queued (default is 0).
|
175
|
+
|
176
|
+
## License
|
177
|
+
This gem is licensed under the MIT License.
|
data/Rakefile
ADDED
@@ -0,0 +1,88 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class ApiRequestProcessor
|
5
|
+
def initialize(urls, in_threads: 4, timeout: 5, retries: 3, logger: nil)
|
6
|
+
@urls = urls
|
7
|
+
@in_threads = in_threads
|
8
|
+
@timeout = timeout
|
9
|
+
@retries = retries
|
10
|
+
@logger = logger || Logger.new(STDOUT)
|
11
|
+
end
|
12
|
+
|
13
|
+
def process(http_method: :get, headers: {}, params: {}, &block)
|
14
|
+
results = []
|
15
|
+
data_processor = DataProcessor.new(data_array: @urls, in_threads: @in_threads)
|
16
|
+
|
17
|
+
data_processor.process do |url|
|
18
|
+
response = nil
|
19
|
+
attempts = 0
|
20
|
+
|
21
|
+
begin
|
22
|
+
uri = URI.parse(url)
|
23
|
+
http = Net::HTTP.new(uri.host, uri.port)
|
24
|
+
http.use_ssl = (uri.scheme == "https")
|
25
|
+
http.read_timeout = @timeout
|
26
|
+
|
27
|
+
request = build_request(http_method, uri, headers, params)
|
28
|
+
|
29
|
+
response = http.request(request)
|
30
|
+
results << {url: url, response: parse_response(response), status: response.code}
|
31
|
+
@logger.info("Request to #{url} succeeded with status #{response.code}")
|
32
|
+
rescue => e
|
33
|
+
attempts += 1
|
34
|
+
@logger.error("Request to #{url} failed: #{e.message}, attempt #{attempts}")
|
35
|
+
|
36
|
+
if attempts <= @retries
|
37
|
+
retry
|
38
|
+
else
|
39
|
+
results << {url: url, error: e.message}
|
40
|
+
@logger.error("Request to #{url} failed after #{@retries} retries.")
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
block.call(response) if block && response
|
45
|
+
end
|
46
|
+
|
47
|
+
results
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def build_request(http_method, uri, headers, params)
|
53
|
+
case http_method
|
54
|
+
when :get
|
55
|
+
uri.query = URI.encode_www_form(params) unless params.empty?
|
56
|
+
Net::HTTP::Get.new(uri.request_uri, headers)
|
57
|
+
when :post
|
58
|
+
request = Net::HTTP::Post.new(uri.request_uri, headers)
|
59
|
+
request.set_form_data(params)
|
60
|
+
request
|
61
|
+
when :put
|
62
|
+
request = Net::HTTP::Put.new(uri.request_uri, headers)
|
63
|
+
request.set_form_data(params)
|
64
|
+
request
|
65
|
+
when :delete
|
66
|
+
Net::HTTP::Delete.new(uri.request_uri, headers)
|
67
|
+
else
|
68
|
+
raise ArgumentError, "Unsupported HTTP method: #{http_method}"
|
69
|
+
end
|
70
|
+
end
|
71
|
+
|
72
|
+
def parse_response(response)
|
73
|
+
content_type = response["content-type"]
|
74
|
+
case content_type
|
75
|
+
when /json/
|
76
|
+
JSON.parse(response.body)
|
77
|
+
when /xml/
|
78
|
+
Nokogiri::XML(response.body)
|
79
|
+
when /html/
|
80
|
+
Nokogiri::HTML(response.body)
|
81
|
+
else
|
82
|
+
response.body
|
83
|
+
end
|
84
|
+
rescue
|
85
|
+
response.body
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
module ChunkProcessor
|
5
|
+
def process_chunk(chunk, &block)
|
6
|
+
chunk.each do |data|
|
7
|
+
block.call(data)
|
8
|
+
rescue => e
|
9
|
+
@logger.error("Error processing data: #{e.message}")
|
10
|
+
end
|
11
|
+
end
|
12
|
+
end
|
13
|
+
end
|
@@ -0,0 +1,93 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class DataProcessor
|
5
|
+
include ChunkProcessor
|
6
|
+
|
7
|
+
def initialize(data_array: nil, stream: nil, in_threads: 4, chunk_size: nil, logger: nil, queue_size: 100, timeout: 10)
|
8
|
+
@data_array = data_array
|
9
|
+
@stream = stream
|
10
|
+
@in_threads = in_threads
|
11
|
+
@chunk_size = chunk_size || (@data_array.size.to_f / in_threads).ceil if @data_array
|
12
|
+
@logger = logger || Logger.new(STDOUT)
|
13
|
+
@queue_size = queue_size
|
14
|
+
@timeout = timeout
|
15
|
+
@middlewares = { before_process: [], after_process: [] }
|
16
|
+
end
|
17
|
+
|
18
|
+
def process(&block)
|
19
|
+
raise ArgumentError, "Data array or stream must be provided" unless @data_array || @stream
|
20
|
+
|
21
|
+
execute_middleware(:before_process)
|
22
|
+
|
23
|
+
@logger.info("Processing started with #{@in_threads} threads")
|
24
|
+
executor = Executors.new_fixed_thread_pool(@in_threads)
|
25
|
+
futures = []
|
26
|
+
|
27
|
+
if @data_array
|
28
|
+
chunks = @data_array.each_slice(@chunk_size).to_a
|
29
|
+
futures = submit_chunks(executor, chunks, &block)
|
30
|
+
elsif @stream
|
31
|
+
futures = submit_stream(executor, @stream, &block)
|
32
|
+
end
|
33
|
+
|
34
|
+
await_completion(futures, executor)
|
35
|
+
|
36
|
+
execute_middleware(:after_process)
|
37
|
+
end
|
38
|
+
|
39
|
+
def add_middleware(position, &middleware_block)
|
40
|
+
raise ArgumentError, "Invalid middleware position" unless @middlewares.key?(position)
|
41
|
+
|
42
|
+
@middlewares[position] << middleware_block
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
def submit_chunks(executor, chunks, &block)
|
48
|
+
chunks.map do |chunk|
|
49
|
+
executor.submit do
|
50
|
+
process_chunk(chunk, &block)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
def submit_stream(executor, stream, &block)
|
56
|
+
queue = LinkedBlockingQueue.new(@queue_size)
|
57
|
+
futures = []
|
58
|
+
|
59
|
+
stream_reader_thread = Thread.new do
|
60
|
+
until stream.eof?
|
61
|
+
line = stream.gets
|
62
|
+
queue.put(line) if line
|
63
|
+
end
|
64
|
+
rescue => e
|
65
|
+
@logger.error("Error reading from stream: #{e.message}")
|
66
|
+
end
|
67
|
+
|
68
|
+
callable = StreamProcessorCallable.new(queue, @logger, block)
|
69
|
+
futures << executor.submit(callable)
|
70
|
+
|
71
|
+
stream_reader_thread.join(@timeout)
|
72
|
+
raise "Stream reading timeout" if stream_reader_thread.alive?
|
73
|
+
|
74
|
+
futures
|
75
|
+
end
|
76
|
+
|
77
|
+
def await_completion(futures, executor)
|
78
|
+
futures.each do |future|
|
79
|
+
future.get
|
80
|
+
rescue => e
|
81
|
+
@logger.error("Task failed: #{e.message}")
|
82
|
+
end
|
83
|
+
executor.shutdown
|
84
|
+
@logger.info("Processing completed")
|
85
|
+
end
|
86
|
+
|
87
|
+
def execute_middleware(position)
|
88
|
+
return unless @middlewares[position]
|
89
|
+
|
90
|
+
@middlewares[position].each(&:call)
|
91
|
+
end
|
92
|
+
end
|
93
|
+
end
|
@@ -0,0 +1,84 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class DistributedWorker
|
5
|
+
HEARTBEAT_INTERVAL = 10
|
6
|
+
|
7
|
+
def initialize(host = "localhost", port = 8787)
|
8
|
+
@server = DRb.start_service("druby://#{host}:#{port}", self)
|
9
|
+
@last_heartbeat = Time.now
|
10
|
+
@task_queue = java.util.concurrent.PriorityBlockingQueue.new
|
11
|
+
@executor = java.util.concurrent.Executors.newFixedThreadPool(4)
|
12
|
+
@heartbeat_thread = start_heartbeat
|
13
|
+
@task_processing_thread = start_task_processing
|
14
|
+
end
|
15
|
+
|
16
|
+
def execute_task(task, priority: 0)
|
17
|
+
begin
|
18
|
+
@task_queue.put(PrioritizedTask.new(task, priority))
|
19
|
+
{ status: :queued }
|
20
|
+
rescue => e
|
21
|
+
{ status: :failed, error: e.message }
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
def self.connect_to_worker(worker_url)
|
26
|
+
DRbObject.new_with_uri(worker_url)
|
27
|
+
end
|
28
|
+
|
29
|
+
def send_heartbeat
|
30
|
+
@last_heartbeat = Time.now
|
31
|
+
end
|
32
|
+
|
33
|
+
def last_heartbeat
|
34
|
+
@last_heartbeat
|
35
|
+
end
|
36
|
+
|
37
|
+
def shutdown
|
38
|
+
@executor.shutdown
|
39
|
+
unless @executor.await_termination(10, java.util.concurrent.TimeUnit::SECONDS)
|
40
|
+
@logger.warn("Executor did not terminate in the expected time. Initiating forced shutdown.")
|
41
|
+
@executor.shutdown_now
|
42
|
+
@executor.await_termination(10, java.util.concurrent.TimeUnit::SECONDS)
|
43
|
+
end
|
44
|
+
|
45
|
+
@heartbeat_thread.interrupt if @heartbeat_thread
|
46
|
+
@task_processing_thread.interrupt if @task_processing_thread
|
47
|
+
@server.stop_service
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def start_heartbeat
|
53
|
+
java.lang.Thread.new do
|
54
|
+
loop do
|
55
|
+
java.lang.Thread.sleep(HEARTBEAT_INTERVAL * 1000)
|
56
|
+
send_heartbeat
|
57
|
+
end
|
58
|
+
end.tap(&:start)
|
59
|
+
end
|
60
|
+
|
61
|
+
def start_task_processing
|
62
|
+
java.lang.Thread.new do
|
63
|
+
loop do
|
64
|
+
task = @task_queue.take
|
65
|
+
@executor.submit(TaskCallable.new(task))
|
66
|
+
end
|
67
|
+
end.tap(&:start)
|
68
|
+
end
|
69
|
+
|
70
|
+
class PrioritizedTask
|
71
|
+
include Comparable
|
72
|
+
attr_reader :task, :priority
|
73
|
+
|
74
|
+
def initialize(task, priority)
|
75
|
+
@task = task
|
76
|
+
@priority = priority
|
77
|
+
end
|
78
|
+
|
79
|
+
def <=>(other)
|
80
|
+
other.priority <=> @priority
|
81
|
+
end
|
82
|
+
end
|
83
|
+
end
|
84
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class StreamProcessorCallable < ApplicationCallable
|
5
|
+
include ChunkProcessor
|
6
|
+
|
7
|
+
def initialize(queue, logger, process_chunk_block)
|
8
|
+
super(&method(:process))
|
9
|
+
@queue = queue
|
10
|
+
@logger = logger
|
11
|
+
@process_chunk_block = process_chunk_block
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def process
|
17
|
+
loop do
|
18
|
+
line = @queue.poll
|
19
|
+
break if line.nil?
|
20
|
+
|
21
|
+
@logger.info("Processing line: #{line.inspect}")
|
22
|
+
process_chunk([line], &@process_chunk_block)
|
23
|
+
rescue => e
|
24
|
+
@logger.error("Error during processing: #{e.message}")
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class TaskCallable < ApplicationCallable
|
5
|
+
def initialize(prioritized_task)
|
6
|
+
@task = prioritized_task.task
|
7
|
+
super(&method(:call_task))
|
8
|
+
end
|
9
|
+
|
10
|
+
private
|
11
|
+
|
12
|
+
def call_task
|
13
|
+
begin
|
14
|
+
@task.call
|
15
|
+
rescue => e
|
16
|
+
puts "Task failed with error: #{e.message}"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module JRubyParallelProcessing
|
4
|
+
class TaskQueue
|
5
|
+
def initialize(max_retries: 3, retry_delay: 0.1, max_queue_size: 10, logger: nil)
|
6
|
+
@max_retries = max_retries
|
7
|
+
@retry_delay = retry_delay
|
8
|
+
@queue = LinkedBlockingQueue.new(max_queue_size)
|
9
|
+
@executor = Executors.new_fixed_thread_pool(10)
|
10
|
+
@logger = logger || Logger.new(STDOUT)
|
11
|
+
@logger.level = Logger::INFO
|
12
|
+
end
|
13
|
+
|
14
|
+
def add_task(priority, &task)
|
15
|
+
raise 'Queue is full' if @queue.remaining_capacity == 0
|
16
|
+
@queue.put([priority, task])
|
17
|
+
end
|
18
|
+
|
19
|
+
def process_tasks
|
20
|
+
tasks = []
|
21
|
+
while (task = @queue.poll)
|
22
|
+
tasks << task
|
23
|
+
end
|
24
|
+
|
25
|
+
sorted_tasks = tasks.sort_by(&:first)
|
26
|
+
sorted_tasks.each do |(_, task)|
|
27
|
+
retry_count = 0
|
28
|
+
begin
|
29
|
+
future = @executor.submit(ApplicationCallable.new(&task))
|
30
|
+
future.get
|
31
|
+
rescue => e
|
32
|
+
retry_count += 1
|
33
|
+
if retry_count <= @max_retries
|
34
|
+
@logger.info("Task failed with error: #{e.message}. Retrying (attempt #{retry_count})")
|
35
|
+
sleep @retry_delay
|
36
|
+
retry
|
37
|
+
else
|
38
|
+
@logger.error("Task failed with error: #{e.message}. Max retries reached.")
|
39
|
+
future.cancel(true) if future.respond_to?(:cancel)
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
def shutdown
|
46
|
+
@logger.info("Shutting down executor.")
|
47
|
+
@executor.shutdown
|
48
|
+
unless @executor.await_termination(10, java.util.concurrent.TimeUnit::SECONDS)
|
49
|
+
@logger.warn("Executor did not terminate in the expected time. Initiating forced shutdown.")
|
50
|
+
@executor.shutdown_now
|
51
|
+
@executor.await_termination(10, java.util.concurrent.TimeUnit::SECONDS)
|
52
|
+
end
|
53
|
+
@logger.info("Executor shutdown completed.")
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "java"
|
4
|
+
require "logger"
|
5
|
+
java_import "java.util.concurrent.Executors"
|
6
|
+
java_import "java.util.concurrent.Callable"
|
7
|
+
java_import "java.util.concurrent.Future"
|
8
|
+
java_import "java.util.concurrent.LinkedBlockingQueue"
|
9
|
+
java_import "java.util.concurrent.locks.ReentrantLock"
|
10
|
+
java_import "java.util.concurrent.TimeUnit"
|
11
|
+
java_import 'java.util.concurrent.CountDownLatch'
|
12
|
+
require_relative "jruby_parallel_processing/version"
|
13
|
+
require "drb/drb"
|
14
|
+
require "net/http"
|
15
|
+
require "nokogiri"
|
16
|
+
require "json"
|
17
|
+
require_relative "jruby_parallel_processing/chunk_processor"
|
18
|
+
require_relative "jruby_parallel_processing/application_callable"
|
19
|
+
require_relative "jruby_parallel_processing/stream_processor_callable"
|
20
|
+
require_relative "jruby_parallel_processing/task_callable"
|
21
|
+
require_relative "jruby_parallel_processing/data_processor"
|
22
|
+
require_relative "jruby_parallel_processing/api_request_processor"
|
23
|
+
require_relative "jruby_parallel_processing/task_queue"
|
24
|
+
require_relative "jruby_parallel_processing/distributed_worker"
|
25
|
+
|
26
|
+
module JRubyParallelProcessing
|
27
|
+
class Error < StandardError; end
|
28
|
+
end
|
metadata
ADDED
@@ -0,0 +1,69 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jruby_parallel_processing
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Netsky
|
8
|
+
autorequire:
|
9
|
+
bindir: exe
|
10
|
+
cert_chain: []
|
11
|
+
date: 2024-09-06 00:00:00.000000000 Z
|
12
|
+
dependencies: []
|
13
|
+
description: JRubyParallelProcessing is a gem for parallel processing tasks in JRuby.
|
14
|
+
It provides classes for efficient data processing, task management with priority
|
15
|
+
and retries, and parallel API request handling. It includes features such as middleware
|
16
|
+
support, stream processing, and distributed task execution.
|
17
|
+
email:
|
18
|
+
- netsky_prod@proton.me
|
19
|
+
executables: []
|
20
|
+
extensions: []
|
21
|
+
extra_rdoc_files: []
|
22
|
+
files:
|
23
|
+
- ".rspec"
|
24
|
+
- CHANGELOG.md
|
25
|
+
- Gemfile
|
26
|
+
- Gemfile.lock
|
27
|
+
- LICENSE.txt
|
28
|
+
- README.md
|
29
|
+
- Rakefile
|
30
|
+
- lib/jruby_parallel_processing.rb
|
31
|
+
- lib/jruby_parallel_processing/api_request_processor.rb
|
32
|
+
- lib/jruby_parallel_processing/application_callable.rb
|
33
|
+
- lib/jruby_parallel_processing/chunk_processor.rb
|
34
|
+
- lib/jruby_parallel_processing/data_processor.rb
|
35
|
+
- lib/jruby_parallel_processing/distributed_worker.rb
|
36
|
+
- lib/jruby_parallel_processing/stream_processor_callable.rb
|
37
|
+
- lib/jruby_parallel_processing/task_callable.rb
|
38
|
+
- lib/jruby_parallel_processing/task_queue.rb
|
39
|
+
- lib/jruby_parallel_processing/version.rb
|
40
|
+
- sig/jruby_parallel_processing.rbs
|
41
|
+
homepage: https://gitlab.com/netsky_prod/jruby-parallel-processing
|
42
|
+
licenses:
|
43
|
+
- MIT
|
44
|
+
metadata:
|
45
|
+
allowed_push_host: https://rubygems.org
|
46
|
+
homepage_uri: https://gitlab.com/netsky_prod/jruby-parallel-processing
|
47
|
+
source_code_uri: https://gitlab.com/netsky_prod/jruby-parallel-processing
|
48
|
+
changelog_uri: https://gitlab.com/netsky_prod/jruby-parallel-processing/-/releases
|
49
|
+
post_install_message:
|
50
|
+
rdoc_options: []
|
51
|
+
require_paths:
|
52
|
+
- lib
|
53
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
54
|
+
requirements:
|
55
|
+
- - ">="
|
56
|
+
- !ruby/object:Gem::Version
|
57
|
+
version: 2.6.0
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: '0'
|
63
|
+
requirements: []
|
64
|
+
rubygems_version: 3.3.26
|
65
|
+
signing_key:
|
66
|
+
specification_version: 4
|
67
|
+
summary: Efficient parallel processing in JRuby with support for data processing,
|
68
|
+
task management, and API requests.
|
69
|
+
test_files: []
|