parallel-forkmanager 1.0.1 → 2.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +17 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +59 -0
- data/EXAMPLES.yard +40 -0
- data/Gemfile +3 -0
- data/README.md +136 -0
- data/Rakefile +18 -0
- data/examples/callbacks.rb +37 -0
- data/examples/data_structures_advanced.rb +67 -0
- data/examples/data_structures_string.rb +44 -0
- data/examples/parallel_http_get.rb +64 -0
- data/examples/use_pfm.rb +30 -0
- data/lib/parallel/forkmanager.rb +693 -411
- data/lib/parallel/forkmanager/dummy_process_status.rb +30 -0
- data/lib/parallel/forkmanager/error.rb +20 -0
- data/lib/parallel/forkmanager/process_interface.rb +51 -0
- data/lib/parallel/forkmanager/serializer.rb +59 -0
- data/lib/parallel/forkmanager/version.rb +8 -0
- data/parallel-forkmanager.gemspec +32 -0
- metadata +115 -36
- data/parallel_http_get.rb +0 -53
- data/use_pfm.rb +0 -40
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7f3f673b359f8e36f9477332915c1ba239b6a40549e1077340d18139684e69a3
|
4
|
+
data.tar.gz: 33cbc848bd7faa60e297e8d064db80251e1422aae9d13fd3b081bab7169b2ede
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 04a22d48287995e4e7759c5310becad54130d7c1624c2b096ef512dc955cca90a1b75e95fef78298e0269bc008fd31ea34a84a3480922e21059fe030bf057b10
|
7
|
+
data.tar.gz: 948fccecafe9d36a62386a29b01665e92638c956b001b39b9ac142e401f239673a10c52e7bd86e1ab7536b23ad84a629d8f4fc86d3f5a995ebe8286bda11a48b
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
---
|
2
|
+
Style/StringLiterals:
|
3
|
+
EnforcedStyle: double_quotes
|
4
|
+
Style/Documentation:
|
5
|
+
# Lots of files under lib/parallel/forkmanager/ start with
|
6
|
+
#
|
7
|
+
# module Parallel
|
8
|
+
# class ForkManager
|
9
|
+
# ...
|
10
|
+
#
|
11
|
+
# which wants a comment before the class ForkManager to keep rubocop
|
12
|
+
# happy.
|
13
|
+
#
|
14
|
+
# That should have been documented in lib/parallel/forkmanager.rb so this
|
15
|
+
# exlcude stops those warnings without littering the code with directives.
|
16
|
+
Exclude:
|
17
|
+
- "lib/parallel/forkmanager/**/*.rb"
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Parallel::ForkManager Changelog
|
2
|
+
|
3
|
+
## 2.0.5 (2015-05-23)
|
4
|
+
|
5
|
+
- Adds reap_finished_children, is_child and is_parent to match Perl PFM 1.14.
|
6
|
+
|
7
|
+
## 2.0.3 (2015-05-10)
|
8
|
+
|
9
|
+
- Start adding tests
|
10
|
+
- Switch to Rubygems packaging
|
11
|
+
- Allow use of Rubocop
|
12
|
+
|
13
|
+
## 2.0.2 (2015-05-08)
|
14
|
+
|
15
|
+
- Fixes bug in garbage collection.
|
16
|
+
|
17
|
+
## 2.0.1 (2015-05-07)
|
18
|
+
|
19
|
+
- Minor doc fixes.
|
20
|
+
- Fixes garbage collection.
|
21
|
+
|
22
|
+
## 2.0.0 (2015-05-04)
|
23
|
+
|
24
|
+
- Refresh to match changes to Perl PFM 1.12.
|
25
|
+
- May the 4th be with you.
|
26
|
+
|
27
|
+
## 1.5.1 (2011-03-04)
|
28
|
+
|
29
|
+
- Resolves bug #29043 wait_one_child failed to retrieve object.
|
30
|
+
- Adds conversion of Object to Hash before serialization to avoid 'singleton can't be dumped' error.
|
31
|
+
- Minor documentation changes for initialize().
|
32
|
+
|
33
|
+
## 1.5.0 (2011-02-25)
|
34
|
+
|
35
|
+
- Implements data structure retrieval as had appeared in Perl Parallel::ForkManager 0.7.6.
|
36
|
+
- Removes support for passing Proc to run_on_* methods; now supports blocks instead.
|
37
|
+
- Documentation updates and code cleanup.
|
38
|
+
|
39
|
+
## 1.2.0 (2010-02-01)
|
40
|
+
|
41
|
+
- Resolves bug #27748 finish throws an error when used with start(ident).
|
42
|
+
- Adds block support to run_on_start(), run_on_wait(), run_on_finish().
|
43
|
+
|
44
|
+
## 1.1.1 (2010-01-05)
|
45
|
+
|
46
|
+
- Resolves bug with Errno::ECHILD.
|
47
|
+
|
48
|
+
## 1.1.0 (2010-01-01)
|
49
|
+
|
50
|
+
- Resolves bug [#27661] forkmanager doesn't fork!.
|
51
|
+
- Adds block support to start() w/doc changes for same.
|
52
|
+
|
53
|
+
## 1.0.1 (2009-10-24)
|
54
|
+
|
55
|
+
- Resolves bug #27328 dies with max procs 1.
|
56
|
+
|
57
|
+
## 1.0.0 (2008-11-03)
|
58
|
+
|
59
|
+
- Initial release
|
data/EXAMPLES.yard
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
= Parallel::ForkManager Examples
|
2
|
+
|
3
|
+
The programs in the examples directory show the use of the Parallel::ForkManager
|
4
|
+
gem.
|
5
|
+
|
6
|
+
To run them from this directory use a command like:
|
7
|
+
|
8
|
+
ruby -Ilib examples/data_structures_advanced.rb
|
9
|
+
|
10
|
+
The programs are discussed below.
|
11
|
+
|
12
|
+
== examples/callbacks.rb
|
13
|
+
|
14
|
+
Example of a program using callbacks to get child exit codes.
|
15
|
+
|
16
|
+
{include:file:examples/callbacks.rb}
|
17
|
+
|
18
|
+
== data_structures_string.rb
|
19
|
+
|
20
|
+
In this simple example, each child sends back a string.
|
21
|
+
|
22
|
+
{include:file:examples/data_structures_string.rb}
|
23
|
+
|
24
|
+
== data_structures_advanced.rb
|
25
|
+
|
26
|
+
A second data structure retrieval example demonstrates how children
|
27
|
+
decide whether or not to send anything back, what to send and how the
|
28
|
+
parent should process whatever is retrieved.
|
29
|
+
|
30
|
+
{include:file:examples/data_structures_advanced.rb}
|
31
|
+
|
32
|
+
== parallel_http_get.rb
|
33
|
+
|
34
|
+
Use multiple workers to fetch data from URLs.
|
35
|
+
|
36
|
+
{include:file:examples/parallel_http_get.rb}
|
37
|
+
|
38
|
+
== use_pfm.rb
|
39
|
+
|
40
|
+
{include:file:examples/use_pfm.rb}
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
# Parallel::ForkManager - A simple parallel processing fork manager.
|
2
|
+
|
3
|
+
[![Build Status](https://travis-ci.org/npatwardhan/ruby-parallel-forkmanager.svg?branch#master)](https://travis-ci.org/npatwardhan/ruby-parallel-forkmanager)
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
Parallel::ForkManager is used for operations that you would like to do
|
8
|
+
in parallel. Typical use is a downloader which could be retrieving
|
9
|
+
hundreds and/or thousands of files.
|
10
|
+
|
11
|
+
Parallel::ForkManager, as its name suggests, uses `fork` to handle parallel
|
12
|
+
processing instead of threads. If you've used `fork` before, you're aware
|
13
|
+
that you need to be responsible for managing (i.e. cleaning up) the
|
14
|
+
processes that were created as a result of the `fork`.
|
15
|
+
|
16
|
+
Parallel::ForkManager handles this for you such that you `start` and
|
17
|
+
`finish` without having to worry about child processes along
|
18
|
+
the way. Further, Parallel::ForkManager provides useful callbacks
|
19
|
+
that you can use when a child starts and/or finishes, or while you're
|
20
|
+
waiting for a child to complete.
|
21
|
+
|
22
|
+
The code for a downloader that uses Net::HTTP would look like this:
|
23
|
+
|
24
|
+
require "rubygems"
|
25
|
+
require "net/http"
|
26
|
+
require "forkmanager"
|
27
|
+
|
28
|
+
my_urls = %w(url1 url2 urlN)
|
29
|
+
|
30
|
+
max_proc = 30
|
31
|
+
my_timeout = 5
|
32
|
+
|
33
|
+
pm = Parallel::ForkManager.new(max_proc)
|
34
|
+
|
35
|
+
my_urls.each do |my_url|
|
36
|
+
pm.start(my_url) && next # blocks until new fork slot is available
|
37
|
+
# doing stuff here with my_url will be in a child
|
38
|
+
url = URI.parse(my_url)
|
39
|
+
|
40
|
+
begin
|
41
|
+
http = Net::HTTP.new(url.host, url.port)
|
42
|
+
http.open_timeout = http.read_timeout = my_timeout
|
43
|
+
res = http.get(url.path)
|
44
|
+
|
45
|
+
status = res.code
|
46
|
+
if status.to_i != 200
|
47
|
+
print "Cannot get #{url.path} from #{url.host}!\n"
|
48
|
+
pm.finish(255)
|
49
|
+
else
|
50
|
+
pm.finish(0)
|
51
|
+
end
|
52
|
+
rescue Timeout::Error, Errno::ECONNREFUSED #> e
|
53
|
+
print "*** ERROR: #{my_url}: #{e.message}!\n"
|
54
|
+
pm.finish(255)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
pm.wait_all_children
|
59
|
+
|
60
|
+
First you need to instantiate the ForkManager with the `new` constructor.
|
61
|
+
You must specify the maximum number of processes to be created. If you
|
62
|
+
specify 0, then *no* `fork` will be done; this is good for debugging purposes.
|
63
|
+
|
64
|
+
Next, use `pm.start` to do the `fork`. `pm.start` returns `nil` in the child
|
65
|
+
process, and child pid in the parent process. The `&& next` skips the internal
|
66
|
+
loop in the parent process. *Note:* `pm.start` dies if the `fork` fails.
|
67
|
+
|
68
|
+
`pm.finish` terminates the child process (assuming a fork was done in the
|
69
|
+
`start`).
|
70
|
+
|
71
|
+
*Note:* You cannot use `pm.start` if you are already in the child process.
|
72
|
+
If you want to manage another set of subprocesses in the child process,
|
73
|
+
you must instantiate another Parallel::ForkManager object!
|
74
|
+
|
75
|
+
## Bugs and Limitations
|
76
|
+
|
77
|
+
Parallel::ForkManager is a Ruby port of Perl Parallel::ForkManager
|
78
|
+
1.14. It was originally ported from Perl Parallel::ForkManager 0.7.5
|
79
|
+
but was recently updated to integrate features implemented in Perl
|
80
|
+
Parallel::ForkManager versions 0.7.6 - 1.14. Bug reports and feature
|
81
|
+
requests are always welcome.
|
82
|
+
|
83
|
+
Do not use Parallel::ForkManager in an environment where other child
|
84
|
+
processes can affect the run of the main program, so using this module
|
85
|
+
is not recommended in an environment where `fork` / `wait` is already used.
|
86
|
+
|
87
|
+
If you want to use more than one copy of the Parallel::ForkManager then
|
88
|
+
you have to make sure that all children processes are terminated - before you
|
89
|
+
use the second object in the main program.
|
90
|
+
|
91
|
+
You are free to use a new copy of Parallel::ForkManager in the child
|
92
|
+
processes, although I don't think it makes sense.
|
93
|
+
|
94
|
+
## Copyright and License
|
95
|
+
|
96
|
+
### Author
|
97
|
+
|
98
|
+
Nathan Patwardhan <noopy.org@gmail.com>
|
99
|
+
|
100
|
+
### Copyright
|
101
|
+
|
102
|
+
Copyright (c) 2008 - 2020 Nathan Patwardhan
|
103
|
+
|
104
|
+
### License
|
105
|
+
|
106
|
+
Distributes under the same terms as Ruby
|
107
|
+
|
108
|
+
## Credits
|
109
|
+
|
110
|
+
### Documentation
|
111
|
+
|
112
|
+
Nathan Patwardhan <noopy.org@gmail.com>, based on Perl
|
113
|
+
Parallel::ForkManager documentation by Noah Robin
|
114
|
+
<sitz@onastick.net> and dLux <dlux@dlux.hu>.
|
115
|
+
|
116
|
+
### Credits (Perl):
|
117
|
+
|
118
|
+
- dLux <dlux@dlux.hu> (author, original Perl module)
|
119
|
+
- Gábor Szabó <szabgab@cpan.org> (co-maintainer)
|
120
|
+
- Michael Gang (bug report)
|
121
|
+
- Noah Robin <sitz@onastick.net> (documentation tweaks)
|
122
|
+
- Chuck Hirstius <chirstius@megapathdsl.net>
|
123
|
+
(callback exit status, original Perl example)
|
124
|
+
- Grant Hopwood <hopwoodg@valero.com> (win32 port)
|
125
|
+
- Mark Southern <mark_southern@merck.com> (bugfix)
|
126
|
+
- Ken Clarke [www.perlprogrammer.net](http://www.perlprogrammer.net)
|
127
|
+
(data structure retrieval)
|
128
|
+
|
129
|
+
### Credits (Ruby):
|
130
|
+
|
131
|
+
- Robert Klemme <shortcutter@googlemail.com>,
|
132
|
+
David A. Black <dblack@rubypal.com> (general awesomeness)
|
133
|
+
- Roger Pack <rogerdpack@gmail.com> (bugfix, fork semantics in start,
|
134
|
+
doc changes)
|
135
|
+
- Mike Stok <mike@stok.ca> (test cases, percussion, backing vocals)
|
136
|
+
- Akinori MUSHA <email@redacted>
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "yard"
|
3
|
+
|
4
|
+
task default: "test"
|
5
|
+
|
6
|
+
task :test do
|
7
|
+
$LOAD_PATH.unshift "lib", "test"
|
8
|
+
Dir.glob("./test/test_*/**/*.rb") { |f| require f }
|
9
|
+
end
|
10
|
+
|
11
|
+
OTHER_DOC_FILES = %w(README.md EXAMPLES.yard CHANGELOG.md)
|
12
|
+
YARD::Rake::YardocTask.new do |t|
|
13
|
+
# The reason for .md and .yard is that Github won't show the included
|
14
|
+
# files if it's markdown, so this attempts to put the "useful" files in
|
15
|
+
# markdown.
|
16
|
+
t.files = %w(lib/**/*.rb - README.md CHANGELOG.md EXAMPLES.yard)
|
17
|
+
t.stats_options = ["--list-undoc"] # optional
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 5
|
7
|
+
names = %w(Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara)
|
8
|
+
|
9
|
+
pm = Parallel::ForkManager.new(max_procs)
|
10
|
+
|
11
|
+
# Setup a callback for when a child finishes up so we can get its exit code
|
12
|
+
pm.run_on_finish do |pid, exit_code, ident|
|
13
|
+
puts "** #{ident} just got out of the pool with PID #{pid} and exit code: #{exit_code}"
|
14
|
+
end
|
15
|
+
|
16
|
+
pm.run_on_start do |pid, ident|
|
17
|
+
puts "** #{ident} started, pid: #{pid}"
|
18
|
+
end
|
19
|
+
|
20
|
+
pm.run_on_wait(0.5) do
|
21
|
+
puts "** Have to wait for one children ..."
|
22
|
+
end
|
23
|
+
|
24
|
+
names.each_index do |child|
|
25
|
+
pm.start(names[child]) && next
|
26
|
+
|
27
|
+
# This code is the child process
|
28
|
+
puts "This is #{names[child]}, Child number #{child}"
|
29
|
+
sleep(2 * child)
|
30
|
+
puts "#{names[child]}, Child #{child} is about to get out..."
|
31
|
+
sleep 1
|
32
|
+
pm.finish(child) # pass an exit code to finish
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "Waiting for Children..."
|
36
|
+
pm.wait_all_children
|
37
|
+
puts "Everybody is out of the pool!"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 20
|
7
|
+
|
8
|
+
pm = Parallel::ForkManager.new(max_procs, "tempdir" => "/tmp")
|
9
|
+
|
10
|
+
# data structure retrieval and handling
|
11
|
+
retrieved_responses = {} # for collecting responses
|
12
|
+
|
13
|
+
# data structure retrieval and handlin
|
14
|
+
pm.run_on_finish do |_pid, _exit_code, ident, _exit_signal, _core_dump, data|
|
15
|
+
if data # test rather than assume child sent anything
|
16
|
+
puts "#{ident} returned #{data.inspect}."
|
17
|
+
|
18
|
+
retrieved_responses[ident] = data
|
19
|
+
else
|
20
|
+
puts "#{ident} did not send anything."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# generate a list of instructions
|
25
|
+
instructions = [ # a unique identifier and what the child process should send
|
26
|
+
{ "name" => "ENV keys as a string", "send" => "keys" },
|
27
|
+
{ "name" => "Send Nothing" },
|
28
|
+
{ "name" => "Childs ENV", "send" => "all" },
|
29
|
+
{ "name" => "Child chooses randomly", "send" => "random" },
|
30
|
+
{ "name" => "Invalid send instructions", "send" => "Na Na Nana Na" },
|
31
|
+
{ "name" => "ENV values in an array", "send" => "values" }
|
32
|
+
]
|
33
|
+
|
34
|
+
# run the parallel processes
|
35
|
+
instructions.each do |instruction|
|
36
|
+
# this time we are using an explicit, unique child process identifier
|
37
|
+
pm.start(instruction["name"]) && next
|
38
|
+
|
39
|
+
unless instruction.key?("send")
|
40
|
+
puts "MT name #{instruction['name']}"
|
41
|
+
pm.finish(0)
|
42
|
+
end
|
43
|
+
|
44
|
+
data = case instruction["send"]
|
45
|
+
when "keys" then ENV.keys
|
46
|
+
when "values" then ENV.values
|
47
|
+
when "all" then ENV.to_h
|
48
|
+
when "random"
|
49
|
+
["I'm just a string.",
|
50
|
+
%w(I am an array),
|
51
|
+
{ "type" => "associative array",
|
52
|
+
"synonym" => "hash",
|
53
|
+
"cool" => "very :)" }
|
54
|
+
].sample
|
55
|
+
else
|
56
|
+
"Invalid instructions: #{instruction['send']}"
|
57
|
+
end
|
58
|
+
|
59
|
+
pm.finish(0, data)
|
60
|
+
end
|
61
|
+
|
62
|
+
pm.wait_all_children
|
63
|
+
|
64
|
+
# post fork processing of returned data structures
|
65
|
+
retrieved_responses.keys.sort.each do |response|
|
66
|
+
puts "Post processing \"#{response}\"..."
|
67
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 2
|
7
|
+
persons = %w(Fred Wilma Ernie Bert Lucy Ethel Curly Moe Larry)
|
8
|
+
|
9
|
+
pm = Parallel::ForkManager.new(max_procs, "tempdir" => "/tmp")
|
10
|
+
|
11
|
+
# data structure retrieval and handling
|
12
|
+
pm.run_on_finish do |pid, _exit_code, _ident, _exit_signal, _core_dump, data|
|
13
|
+
if data # children are not forced to send anything
|
14
|
+
puts data
|
15
|
+
else # problems occuring during storage or retrieval will throw a warning
|
16
|
+
puts "No message received from child process #{pid}!"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# prep random statement components
|
21
|
+
foods = [
|
22
|
+
"chocolate", "ice cream", "peanut butter", "pickles", "pizza", "bacon",
|
23
|
+
"pancakes", "spaghetti", "cookies"
|
24
|
+
]
|
25
|
+
opinions = [
|
26
|
+
"loves", "can't stand", "always wants more", "will walk 100 miles for",
|
27
|
+
"only eats", "would starve rather than eat"
|
28
|
+
]
|
29
|
+
|
30
|
+
# run the parallel processes
|
31
|
+
persons.each do |person|
|
32
|
+
pm.start && next
|
33
|
+
|
34
|
+
# generate a random statement about food preferences
|
35
|
+
statement = "#{person} #{opinions.sample} #{foods.sample}"
|
36
|
+
|
37
|
+
if rand(5) > 0
|
38
|
+
pm.finish(0, statement)
|
39
|
+
else
|
40
|
+
pm.finish(0)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
pm.wait_all_children
|