parallel-forkmanager 1.0.1 → 2.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +6 -0
- data/.rubocop.yml +17 -0
- data/.travis.yml +5 -0
- data/CHANGELOG.md +59 -0
- data/EXAMPLES.yard +40 -0
- data/Gemfile +3 -0
- data/README.md +136 -0
- data/Rakefile +18 -0
- data/examples/callbacks.rb +37 -0
- data/examples/data_structures_advanced.rb +67 -0
- data/examples/data_structures_string.rb +44 -0
- data/examples/parallel_http_get.rb +64 -0
- data/examples/use_pfm.rb +30 -0
- data/lib/parallel/forkmanager.rb +693 -411
- data/lib/parallel/forkmanager/dummy_process_status.rb +30 -0
- data/lib/parallel/forkmanager/error.rb +20 -0
- data/lib/parallel/forkmanager/process_interface.rb +51 -0
- data/lib/parallel/forkmanager/serializer.rb +59 -0
- data/lib/parallel/forkmanager/version.rb +8 -0
- data/parallel-forkmanager.gemspec +32 -0
- metadata +115 -36
- data/parallel_http_get.rb +0 -53
- data/use_pfm.rb +0 -40
checksums.yaml
ADDED
@@ -0,0 +1,7 @@
|
|
1
|
+
---
|
2
|
+
SHA256:
|
3
|
+
metadata.gz: 7f3f673b359f8e36f9477332915c1ba239b6a40549e1077340d18139684e69a3
|
4
|
+
data.tar.gz: 33cbc848bd7faa60e297e8d064db80251e1422aae9d13fd3b081bab7169b2ede
|
5
|
+
SHA512:
|
6
|
+
metadata.gz: 04a22d48287995e4e7759c5310becad54130d7c1624c2b096ef512dc955cca90a1b75e95fef78298e0269bc008fd31ea34a84a3480922e21059fe030bf057b10
|
7
|
+
data.tar.gz: 948fccecafe9d36a62386a29b01665e92638c956b001b39b9ac142e401f239673a10c52e7bd86e1ab7536b23ad84a629d8f4fc86d3f5a995ebe8286bda11a48b
|
data/.gitignore
ADDED
data/.rubocop.yml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
---
|
2
|
+
Style/StringLiterals:
|
3
|
+
EnforcedStyle: double_quotes
|
4
|
+
Style/Documentation:
|
5
|
+
# Lots of files under lib/parallel/forkmanager/ start with
|
6
|
+
#
|
7
|
+
# module Parallel
|
8
|
+
# class ForkManager
|
9
|
+
# ...
|
10
|
+
#
|
11
|
+
# which wants a comment before the class ForkManager to keep rubocop
|
12
|
+
# happy.
|
13
|
+
#
|
14
|
+
# That should have been documented in lib/parallel/forkmanager.rb so this
|
15
|
+
# exlcude stops those warnings without littering the code with directives.
|
16
|
+
Exclude:
|
17
|
+
- "lib/parallel/forkmanager/**/*.rb"
|
data/.travis.yml
ADDED
data/CHANGELOG.md
ADDED
@@ -0,0 +1,59 @@
|
|
1
|
+
# Parallel::ForkManager Changelog
|
2
|
+
|
3
|
+
## 2.0.5 (2015-05-23)
|
4
|
+
|
5
|
+
- Adds reap_finished_children, is_child and is_parent to match Perl PFM 1.14.
|
6
|
+
|
7
|
+
## 2.0.3 (2015-05-10)
|
8
|
+
|
9
|
+
- Start adding tests
|
10
|
+
- Switch to Rubygems packaging
|
11
|
+
- Allow use of Rubocop
|
12
|
+
|
13
|
+
## 2.0.2 (2015-05-08)
|
14
|
+
|
15
|
+
- Fixes bug in garbage collection.
|
16
|
+
|
17
|
+
## 2.0.1 (2015-05-07)
|
18
|
+
|
19
|
+
- Minor doc fixes.
|
20
|
+
- Fixes garbage collection.
|
21
|
+
|
22
|
+
## 2.0.0 (2015-05-04)
|
23
|
+
|
24
|
+
- Refresh to match changes to Perl PFM 1.12.
|
25
|
+
- May the 4th be with you.
|
26
|
+
|
27
|
+
## 1.5.1 (2011-03-04)
|
28
|
+
|
29
|
+
- Resolves bug #29043 wait_one_child failed to retrieve object.
|
30
|
+
- Adds conversion of Object to Hash before serialization to avoid 'singleton can't be dumped' error.
|
31
|
+
- Minor documentation changes for initialize().
|
32
|
+
|
33
|
+
## 1.5.0 (2011-02-25)
|
34
|
+
|
35
|
+
- Implements data structure retrieval as had appeared in Perl Parallel::ForkManager 0.7.6.
|
36
|
+
- Removes support for passing Proc to run_on_* methods; now supports blocks instead.
|
37
|
+
- Documentation updates and code cleanup.
|
38
|
+
|
39
|
+
## 1.2.0 (2010-02-01)
|
40
|
+
|
41
|
+
- Resolves bug #27748 finish throws an error when used with start(ident).
|
42
|
+
- Adds block support to run_on_start(), run_on_wait(), run_on_finish().
|
43
|
+
|
44
|
+
## 1.1.1 (2010-01-05)
|
45
|
+
|
46
|
+
- Resolves bug with Errno::ECHILD.
|
47
|
+
|
48
|
+
## 1.1.0 (2010-01-01)
|
49
|
+
|
50
|
+
- Resolves bug [#27661] forkmanager doesn't fork!.
|
51
|
+
- Adds block support to start() w/doc changes for same.
|
52
|
+
|
53
|
+
## 1.0.1 (2009-10-24)
|
54
|
+
|
55
|
+
- Resolves bug #27328 dies with max procs 1.
|
56
|
+
|
57
|
+
## 1.0.0 (2008-11-03)
|
58
|
+
|
59
|
+
- Initial release
|
data/EXAMPLES.yard
ADDED
@@ -0,0 +1,40 @@
|
|
1
|
+
= Parallel::ForkManager Examples
|
2
|
+
|
3
|
+
The programs in the examples directory show the use of the Parallel::ForkManager
|
4
|
+
gem.
|
5
|
+
|
6
|
+
To run them from this directory use a command like:
|
7
|
+
|
8
|
+
ruby -Ilib examples/data_structures_advanced.rb
|
9
|
+
|
10
|
+
The programs are discussed below.
|
11
|
+
|
12
|
+
== examples/callbacks.rb
|
13
|
+
|
14
|
+
Example of a program using callbacks to get child exit codes.
|
15
|
+
|
16
|
+
{include:file:examples/callbacks.rb}
|
17
|
+
|
18
|
+
== data_structures_string.rb
|
19
|
+
|
20
|
+
In this simple example, each child sends back a string.
|
21
|
+
|
22
|
+
{include:file:examples/data_structures_string.rb}
|
23
|
+
|
24
|
+
== data_structures_advanced.rb
|
25
|
+
|
26
|
+
A second data structure retrieval example demonstrates how children
|
27
|
+
decide whether or not to send anything back, what to send and how the
|
28
|
+
parent should process whatever is retrieved.
|
29
|
+
|
30
|
+
{include:file:examples/data_structures_advanced.rb}
|
31
|
+
|
32
|
+
== parallel_http_get.rb
|
33
|
+
|
34
|
+
Use multiple workers to fetch data from URLs.
|
35
|
+
|
36
|
+
{include:file:examples/parallel_http_get.rb}
|
37
|
+
|
38
|
+
== use_pfm.rb
|
39
|
+
|
40
|
+
{include:file:examples/use_pfm.rb}
|
data/Gemfile
ADDED
data/README.md
ADDED
@@ -0,0 +1,136 @@
|
|
1
|
+
# Parallel::ForkManager - A simple parallel processing fork manager.
|
2
|
+
|
3
|
+
[](https://travis-ci.org/npatwardhan/ruby-parallel-forkmanager)
|
4
|
+
|
5
|
+
## Overview
|
6
|
+
|
7
|
+
Parallel::ForkManager is used for operations that you would like to do
|
8
|
+
in parallel. Typical use is a downloader which could be retrieving
|
9
|
+
hundreds and/or thousands of files.
|
10
|
+
|
11
|
+
Parallel::ForkManager, as its name suggests, uses `fork` to handle parallel
|
12
|
+
processing instead of threads. If you've used `fork` before, you're aware
|
13
|
+
that you need to be responsible for managing (i.e. cleaning up) the
|
14
|
+
processes that were created as a result of the `fork`.
|
15
|
+
|
16
|
+
Parallel::ForkManager handles this for you such that you `start` and
|
17
|
+
`finish` without having to worry about child processes along
|
18
|
+
the way. Further, Parallel::ForkManager provides useful callbacks
|
19
|
+
that you can use when a child starts and/or finishes, or while you're
|
20
|
+
waiting for a child to complete.
|
21
|
+
|
22
|
+
The code for a downloader that uses Net::HTTP would look like this:
|
23
|
+
|
24
|
+
require "rubygems"
|
25
|
+
require "net/http"
|
26
|
+
require "forkmanager"
|
27
|
+
|
28
|
+
my_urls = %w(url1 url2 urlN)
|
29
|
+
|
30
|
+
max_proc = 30
|
31
|
+
my_timeout = 5
|
32
|
+
|
33
|
+
pm = Parallel::ForkManager.new(max_proc)
|
34
|
+
|
35
|
+
my_urls.each do |my_url|
|
36
|
+
pm.start(my_url) && next # blocks until new fork slot is available
|
37
|
+
# doing stuff here with my_url will be in a child
|
38
|
+
url = URI.parse(my_url)
|
39
|
+
|
40
|
+
begin
|
41
|
+
http = Net::HTTP.new(url.host, url.port)
|
42
|
+
http.open_timeout = http.read_timeout = my_timeout
|
43
|
+
res = http.get(url.path)
|
44
|
+
|
45
|
+
status = res.code
|
46
|
+
if status.to_i != 200
|
47
|
+
print "Cannot get #{url.path} from #{url.host}!\n"
|
48
|
+
pm.finish(255)
|
49
|
+
else
|
50
|
+
pm.finish(0)
|
51
|
+
end
|
52
|
+
rescue Timeout::Error, Errno::ECONNREFUSED #> e
|
53
|
+
print "*** ERROR: #{my_url}: #{e.message}!\n"
|
54
|
+
pm.finish(255)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
pm.wait_all_children
|
59
|
+
|
60
|
+
First you need to instantiate the ForkManager with the `new` constructor.
|
61
|
+
You must specify the maximum number of processes to be created. If you
|
62
|
+
specify 0, then *no* `fork` will be done; this is good for debugging purposes.
|
63
|
+
|
64
|
+
Next, use `pm.start` to do the `fork`. `pm.start` returns `nil` in the child
|
65
|
+
process, and child pid in the parent process. The `&& next` skips the internal
|
66
|
+
loop in the parent process. *Note:* `pm.start` dies if the `fork` fails.
|
67
|
+
|
68
|
+
`pm.finish` terminates the child process (assuming a fork was done in the
|
69
|
+
`start`).
|
70
|
+
|
71
|
+
*Note:* You cannot use `pm.start` if you are already in the child process.
|
72
|
+
If you want to manage another set of subprocesses in the child process,
|
73
|
+
you must instantiate another Parallel::ForkManager object!
|
74
|
+
|
75
|
+
## Bugs and Limitations
|
76
|
+
|
77
|
+
Parallel::ForkManager is a Ruby port of Perl Parallel::ForkManager
|
78
|
+
1.14. It was originally ported from Perl Parallel::ForkManager 0.7.5
|
79
|
+
but was recently updated to integrate features implemented in Perl
|
80
|
+
Parallel::ForkManager versions 0.7.6 - 1.14. Bug reports and feature
|
81
|
+
requests are always welcome.
|
82
|
+
|
83
|
+
Do not use Parallel::ForkManager in an environment where other child
|
84
|
+
processes can affect the run of the main program, so using this module
|
85
|
+
is not recommended in an environment where `fork` / `wait` is already used.
|
86
|
+
|
87
|
+
If you want to use more than one copy of the Parallel::ForkManager then
|
88
|
+
you have to make sure that all children processes are terminated - before you
|
89
|
+
use the second object in the main program.
|
90
|
+
|
91
|
+
You are free to use a new copy of Parallel::ForkManager in the child
|
92
|
+
processes, although I don't think it makes sense.
|
93
|
+
|
94
|
+
## Copyright and License
|
95
|
+
|
96
|
+
### Author
|
97
|
+
|
98
|
+
Nathan Patwardhan <noopy.org@gmail.com>
|
99
|
+
|
100
|
+
### Copyright
|
101
|
+
|
102
|
+
Copyright (c) 2008 - 2020 Nathan Patwardhan
|
103
|
+
|
104
|
+
### License
|
105
|
+
|
106
|
+
Distributes under the same terms as Ruby
|
107
|
+
|
108
|
+
## Credits
|
109
|
+
|
110
|
+
### Documentation
|
111
|
+
|
112
|
+
Nathan Patwardhan <noopy.org@gmail.com>, based on Perl
|
113
|
+
Parallel::ForkManager documentation by Noah Robin
|
114
|
+
<sitz@onastick.net> and dLux <dlux@dlux.hu>.
|
115
|
+
|
116
|
+
### Credits (Perl):
|
117
|
+
|
118
|
+
- dLux <dlux@dlux.hu> (author, original Perl module)
|
119
|
+
- Gábor Szabó <szabgab@cpan.org> (co-maintainer)
|
120
|
+
- Michael Gang (bug report)
|
121
|
+
- Noah Robin <sitz@onastick.net> (documentation tweaks)
|
122
|
+
- Chuck Hirstius <chirstius@megapathdsl.net>
|
123
|
+
(callback exit status, original Perl example)
|
124
|
+
- Grant Hopwood <hopwoodg@valero.com> (win32 port)
|
125
|
+
- Mark Southern <mark_southern@merck.com> (bugfix)
|
126
|
+
- Ken Clarke [www.perlprogrammer.net](http://www.perlprogrammer.net)
|
127
|
+
(data structure retrieval)
|
128
|
+
|
129
|
+
### Credits (Ruby):
|
130
|
+
|
131
|
+
- Robert Klemme <shortcutter@googlemail.com>,
|
132
|
+
David A. Black <dblack@rubypal.com> (general awesomeness)
|
133
|
+
- Roger Pack <rogerdpack@gmail.com> (bugfix, fork semantics in start,
|
134
|
+
doc changes)
|
135
|
+
- Mike Stok <mike@stok.ca> (test cases, percussion, backing vocals)
|
136
|
+
- Akinori MUSHA <email@redacted>
|
data/Rakefile
ADDED
@@ -0,0 +1,18 @@
|
|
1
|
+
require "bundler/gem_tasks"
|
2
|
+
require "yard"
|
3
|
+
|
4
|
+
task default: "test"
|
5
|
+
|
6
|
+
task :test do
|
7
|
+
$LOAD_PATH.unshift "lib", "test"
|
8
|
+
Dir.glob("./test/test_*/**/*.rb") { |f| require f }
|
9
|
+
end
|
10
|
+
|
11
|
+
OTHER_DOC_FILES = %w(README.md EXAMPLES.yard CHANGELOG.md)
|
12
|
+
YARD::Rake::YardocTask.new do |t|
|
13
|
+
# The reason for .md and .yard is that Github won't show the included
|
14
|
+
# files if it's markdown, so this attempts to put the "useful" files in
|
15
|
+
# markdown.
|
16
|
+
t.files = %w(lib/**/*.rb - README.md CHANGELOG.md EXAMPLES.yard)
|
17
|
+
t.stats_options = ["--list-undoc"] # optional
|
18
|
+
end
|
@@ -0,0 +1,37 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 5
|
7
|
+
names = %w(Fred Jim Lily Steve Jessica Bob Dave Christine Rico Sara)
|
8
|
+
|
9
|
+
pm = Parallel::ForkManager.new(max_procs)
|
10
|
+
|
11
|
+
# Setup a callback for when a child finishes up so we can get its exit code
|
12
|
+
pm.run_on_finish do |pid, exit_code, ident|
|
13
|
+
puts "** #{ident} just got out of the pool with PID #{pid} and exit code: #{exit_code}"
|
14
|
+
end
|
15
|
+
|
16
|
+
pm.run_on_start do |pid, ident|
|
17
|
+
puts "** #{ident} started, pid: #{pid}"
|
18
|
+
end
|
19
|
+
|
20
|
+
pm.run_on_wait(0.5) do
|
21
|
+
puts "** Have to wait for one children ..."
|
22
|
+
end
|
23
|
+
|
24
|
+
names.each_index do |child|
|
25
|
+
pm.start(names[child]) && next
|
26
|
+
|
27
|
+
# This code is the child process
|
28
|
+
puts "This is #{names[child]}, Child number #{child}"
|
29
|
+
sleep(2 * child)
|
30
|
+
puts "#{names[child]}, Child #{child} is about to get out..."
|
31
|
+
sleep 1
|
32
|
+
pm.finish(child) # pass an exit code to finish
|
33
|
+
end
|
34
|
+
|
35
|
+
puts "Waiting for Children..."
|
36
|
+
pm.wait_all_children
|
37
|
+
puts "Everybody is out of the pool!"
|
@@ -0,0 +1,67 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 20
|
7
|
+
|
8
|
+
pm = Parallel::ForkManager.new(max_procs, "tempdir" => "/tmp")
|
9
|
+
|
10
|
+
# data structure retrieval and handling
|
11
|
+
retrieved_responses = {} # for collecting responses
|
12
|
+
|
13
|
+
# data structure retrieval and handlin
|
14
|
+
pm.run_on_finish do |_pid, _exit_code, ident, _exit_signal, _core_dump, data|
|
15
|
+
if data # test rather than assume child sent anything
|
16
|
+
puts "#{ident} returned #{data.inspect}."
|
17
|
+
|
18
|
+
retrieved_responses[ident] = data
|
19
|
+
else
|
20
|
+
puts "#{ident} did not send anything."
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# generate a list of instructions
|
25
|
+
instructions = [ # a unique identifier and what the child process should send
|
26
|
+
{ "name" => "ENV keys as a string", "send" => "keys" },
|
27
|
+
{ "name" => "Send Nothing" },
|
28
|
+
{ "name" => "Childs ENV", "send" => "all" },
|
29
|
+
{ "name" => "Child chooses randomly", "send" => "random" },
|
30
|
+
{ "name" => "Invalid send instructions", "send" => "Na Na Nana Na" },
|
31
|
+
{ "name" => "ENV values in an array", "send" => "values" }
|
32
|
+
]
|
33
|
+
|
34
|
+
# run the parallel processes
|
35
|
+
instructions.each do |instruction|
|
36
|
+
# this time we are using an explicit, unique child process identifier
|
37
|
+
pm.start(instruction["name"]) && next
|
38
|
+
|
39
|
+
unless instruction.key?("send")
|
40
|
+
puts "MT name #{instruction['name']}"
|
41
|
+
pm.finish(0)
|
42
|
+
end
|
43
|
+
|
44
|
+
data = case instruction["send"]
|
45
|
+
when "keys" then ENV.keys
|
46
|
+
when "values" then ENV.values
|
47
|
+
when "all" then ENV.to_h
|
48
|
+
when "random"
|
49
|
+
["I'm just a string.",
|
50
|
+
%w(I am an array),
|
51
|
+
{ "type" => "associative array",
|
52
|
+
"synonym" => "hash",
|
53
|
+
"cool" => "very :)" }
|
54
|
+
].sample
|
55
|
+
else
|
56
|
+
"Invalid instructions: #{instruction['send']}"
|
57
|
+
end
|
58
|
+
|
59
|
+
pm.finish(0, data)
|
60
|
+
end
|
61
|
+
|
62
|
+
pm.wait_all_children
|
63
|
+
|
64
|
+
# post fork processing of returned data structures
|
65
|
+
retrieved_responses.keys.sort.each do |response|
|
66
|
+
puts "Post processing \"#{response}\"..."
|
67
|
+
end
|
@@ -0,0 +1,44 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require "rubygems"
|
4
|
+
require "parallel/forkmanager"
|
5
|
+
|
6
|
+
max_procs = 2
|
7
|
+
persons = %w(Fred Wilma Ernie Bert Lucy Ethel Curly Moe Larry)
|
8
|
+
|
9
|
+
pm = Parallel::ForkManager.new(max_procs, "tempdir" => "/tmp")
|
10
|
+
|
11
|
+
# data structure retrieval and handling
|
12
|
+
pm.run_on_finish do |pid, _exit_code, _ident, _exit_signal, _core_dump, data|
|
13
|
+
if data # children are not forced to send anything
|
14
|
+
puts data
|
15
|
+
else # problems occuring during storage or retrieval will throw a warning
|
16
|
+
puts "No message received from child process #{pid}!"
|
17
|
+
end
|
18
|
+
end
|
19
|
+
|
20
|
+
# prep random statement components
|
21
|
+
foods = [
|
22
|
+
"chocolate", "ice cream", "peanut butter", "pickles", "pizza", "bacon",
|
23
|
+
"pancakes", "spaghetti", "cookies"
|
24
|
+
]
|
25
|
+
opinions = [
|
26
|
+
"loves", "can't stand", "always wants more", "will walk 100 miles for",
|
27
|
+
"only eats", "would starve rather than eat"
|
28
|
+
]
|
29
|
+
|
30
|
+
# run the parallel processes
|
31
|
+
persons.each do |person|
|
32
|
+
pm.start && next
|
33
|
+
|
34
|
+
# generate a random statement about food preferences
|
35
|
+
statement = "#{person} #{opinions.sample} #{foods.sample}"
|
36
|
+
|
37
|
+
if rand(5) > 0
|
38
|
+
pm.finish(0, statement)
|
39
|
+
else
|
40
|
+
pm.finish(0)
|
41
|
+
end
|
42
|
+
end
|
43
|
+
|
44
|
+
pm.wait_all_children
|