taskbag 1.0.2 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/README.md +53 -2
- data/lib/taskbag/bag.rb +19 -19
- data/lib/taskbag/version.rb +1 -1
- data/lib/taskbag/worker.rb +7 -5
- data/spec/bag_spec.rb +79 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/worker_spec.rb +27 -0
- data/taskbag.gemspec +1 -0
- metadata +38 -10
- checksums.yaml +0 -7
data/README.md
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# TaskBag
|
2
2
|
|
3
|
-
|
3
|
+
A super simple implementation of the Bag-of-Tasks Paradigm [1].
|
4
|
+
|
5
|
+
This gem was originally created to scrap websites and download content in a parallel fashion.
|
4
6
|
|
5
7
|
## Installation
|
6
8
|
|
@@ -18,7 +20,51 @@ Or install it yourself as:
|
|
18
20
|
|
19
21
|
## Usage
|
20
22
|
|
21
|
-
|
23
|
+
### When to use it
|
24
|
+
|
25
|
+
You have [2]:
|
26
|
+
* A bunch of tasks to do
|
27
|
+
* They are all independent
|
28
|
+
* Similar, maybe just different input files
|
29
|
+
|
30
|
+
In TaskBag you can actually have different tasks in the same bag.
|
31
|
+
|
32
|
+
### When not to use it
|
33
|
+
|
34
|
+
If you don't in the above.
|
35
|
+
|
36
|
+
### How to use it
|
37
|
+
|
38
|
+
Define your job class:
|
39
|
+
|
40
|
+
```
|
41
|
+
class DownloadVideoJob < Struct.new(:video_url)
|
42
|
+
def run
|
43
|
+
`wget -c #{self.video_url}`
|
44
|
+
end
|
45
|
+
end
|
46
|
+
```
|
47
|
+
|
48
|
+
create your bag and open it with a number of workers:
|
49
|
+
|
50
|
+
```
|
51
|
+
bag = TaskBag::Bag.new
|
52
|
+
bag.open 4
|
53
|
+
```
|
54
|
+
|
55
|
+
add your tasks however you want (maybe from another worker):
|
56
|
+
|
57
|
+
```
|
58
|
+
bag.add DownloadVideoJob.new('http://www.sometube.com/video.mp4')
|
59
|
+
```
|
60
|
+
|
61
|
+
don't *ever* forget to close the bag, or your application might finish before all the jobs are run:
|
62
|
+
|
63
|
+
```
|
64
|
+
bag.close!
|
65
|
+
```
|
66
|
+
|
67
|
+
As said before, you can add more tasks as the result of a job, for example, if you're writting a page scrapper, you can initiate your taskbag with only one job to scrap the index page, which will create other jobs to scrap each of the pages and find more jobs. The number of workers will never change unless you close and open your bag again.
|
22
68
|
|
23
69
|
## Contributing
|
24
70
|
|
@@ -27,3 +73,8 @@ TODO: Write usage instructions here
|
|
27
73
|
3. Commit your changes (`git commit -am 'Add some feature'`)
|
28
74
|
4. Push to the branch (`git push origin my-new-feature`)
|
29
75
|
5. Create new Pull Request
|
76
|
+
|
77
|
+
## References
|
78
|
+
|
79
|
+
[1] http://www.cs.arizona.edu/~greg/mpdbook/glossary.html
|
80
|
+
[2] http://www.eead.csic.es/compbio/material/programacion_rocks/pics/paral_tareas.pdf
|
data/lib/taskbag/bag.rb
CHANGED
@@ -1,39 +1,39 @@
|
|
1
|
+
require 'thread'
|
2
|
+
|
1
3
|
module TaskBag
|
2
4
|
class Bag
|
3
|
-
def initialize(
|
4
|
-
@
|
5
|
-
@
|
6
|
-
@
|
5
|
+
def initialize(jobs=Queue.new)
|
6
|
+
@closed = true
|
7
|
+
@jobs = jobs
|
8
|
+
@threads = []
|
7
9
|
end
|
8
10
|
|
9
|
-
def open(
|
11
|
+
def open(nworkers)
|
12
|
+
raise "Bag is already opened!" unless closed?
|
10
13
|
@closed = false
|
11
|
-
|
12
|
-
@threads =
|
13
|
-
Thread.new {
|
14
|
+
_self = self
|
15
|
+
@threads = nworkers.times.map do
|
16
|
+
Thread.new { Worker.start(_self) }
|
14
17
|
end
|
15
18
|
end
|
16
19
|
|
17
|
-
def
|
18
|
-
|
19
|
-
|
20
|
-
}
|
21
|
-
end
|
22
|
-
|
23
|
-
def close
|
24
|
-
loop { break unless @tasks.any? }
|
20
|
+
def close!
|
21
|
+
raise 'Bag is already closed!' if closed?
|
22
|
+
loop { break if @jobs.empty? }
|
25
23
|
@closed = true
|
26
24
|
@threads.each{|t| t.join}
|
27
25
|
end
|
28
26
|
|
27
|
+
def add(object)
|
28
|
+
@jobs.push object
|
29
|
+
end
|
30
|
+
|
29
31
|
def closed?
|
30
32
|
!!@closed
|
31
33
|
end
|
32
34
|
|
33
35
|
def next
|
34
|
-
@
|
35
|
-
@tasks.pop
|
36
|
-
}
|
36
|
+
@jobs.pop unless @jobs.empty?
|
37
37
|
end
|
38
38
|
end
|
39
39
|
end
|
data/lib/taskbag/version.rb
CHANGED
data/lib/taskbag/worker.rb
CHANGED
@@ -4,15 +4,17 @@ module TaskBag
|
|
4
4
|
@bag = bag
|
5
5
|
end
|
6
6
|
|
7
|
+
def self.start(bag)
|
8
|
+
Worker.new(bag).start
|
9
|
+
end
|
10
|
+
|
7
11
|
def start
|
8
12
|
until @bag.closed?
|
9
|
-
|
10
|
-
(sleep(1) and next) if
|
13
|
+
job = @bag.next
|
14
|
+
(sleep(1) and next) if job.nil?
|
11
15
|
|
12
|
-
|
16
|
+
job.run
|
13
17
|
end
|
14
18
|
end
|
15
|
-
|
16
|
-
def run(task); end
|
17
19
|
end
|
18
20
|
end
|
data/spec/bag_spec.rb
ADDED
@@ -0,0 +1,79 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TaskBag::Bag do
|
4
|
+
let(:mocked_queue) { double('mocked queue', empty?: true) }
|
5
|
+
subject { TaskBag::Bag.new }
|
6
|
+
|
7
|
+
it { should be_closed }
|
8
|
+
|
9
|
+
describe 'opening a bag' do
|
10
|
+
it 'makes the bag open' do
|
11
|
+
subject.open 0
|
12
|
+
subject.should_not be_closed
|
13
|
+
subject.close!
|
14
|
+
end
|
15
|
+
|
16
|
+
it 'raises an exception if already open' do
|
17
|
+
subject.open 0
|
18
|
+
expect { subject.open(0) }.to raise_error
|
19
|
+
end
|
20
|
+
|
21
|
+
it 'creates the given number of workers in different threads' do
|
22
|
+
TaskBag::Worker.should_receive(:start)
|
23
|
+
.with(subject)
|
24
|
+
.exactly(3).times
|
25
|
+
.and_return 1, 2, 3
|
26
|
+
|
27
|
+
subject.open(3).should have(3).items
|
28
|
+
subject.close!
|
29
|
+
end
|
30
|
+
end
|
31
|
+
|
32
|
+
describe 'closing a bag' do
|
33
|
+
subject { TaskBag::Bag.new(mocked_queue) }
|
34
|
+
|
35
|
+
it 'should be closed after' do
|
36
|
+
mocked_queue.should_receive(:empty?).and_return true
|
37
|
+
subject.open 0
|
38
|
+
subject.close!
|
39
|
+
subject.should be_closed
|
40
|
+
end
|
41
|
+
|
42
|
+
it 'raises an exception if already closed' do
|
43
|
+
expect { subject.close! }.to raise_error
|
44
|
+
end
|
45
|
+
|
46
|
+
it 'waits for all jobs to be passed to workers before closing' do
|
47
|
+
mocked_queue.should_receive(:empty?).and_return false, false, true
|
48
|
+
subject.open 0
|
49
|
+
subject.close!
|
50
|
+
end
|
51
|
+
|
52
|
+
context 'a full bag' do
|
53
|
+
subject { TaskBag::Bag.new }
|
54
|
+
it 'waits for every worker to finish before it closes' do
|
55
|
+
finished = 0
|
56
|
+
job = double('job')
|
57
|
+
job.stub :run do
|
58
|
+
sleep 0.5
|
59
|
+
finished += 1
|
60
|
+
end
|
61
|
+
subject.open 2
|
62
|
+
subject.add job
|
63
|
+
subject.add job
|
64
|
+
subject.add job
|
65
|
+
subject.close!
|
66
|
+
finished.should == 3
|
67
|
+
end
|
68
|
+
end
|
69
|
+
end
|
70
|
+
|
71
|
+
it 'execute the jobs in a FIFO fashion' do
|
72
|
+
subject.add 1
|
73
|
+
subject.add 2
|
74
|
+
|
75
|
+
subject.next.should be 1
|
76
|
+
subject.next.should be 2
|
77
|
+
subject.next.should be_nil
|
78
|
+
end
|
79
|
+
end
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
require_relative '../lib/taskbag'
|
data/spec/worker_spec.rb
ADDED
@@ -0,0 +1,27 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe TaskBag::Worker do
|
4
|
+
let(:bag) { double('Bag')}
|
5
|
+
subject { TaskBag::Worker.new(bag) }
|
6
|
+
|
7
|
+
it 'runs tasks until bag is closed' do
|
8
|
+
job1, job2 = double('job 1'), double('job 2')
|
9
|
+
job1.should_receive(:run).with no_args
|
10
|
+
job2.should_receive(:run).with no_args
|
11
|
+
bag.should_receive(:closed?).and_return false, false, true
|
12
|
+
bag.should_receive(:next).and_return job1, job2
|
13
|
+
|
14
|
+
subject.start
|
15
|
+
end
|
16
|
+
|
17
|
+
it 'worker keep asking for jobs every second' do
|
18
|
+
job = double('job')
|
19
|
+
job.should_receive(:run).with no_args
|
20
|
+
bag.should_receive(:closed?).and_return false, false, true
|
21
|
+
bag.should_receive(:next).and_return nil, job
|
22
|
+
|
23
|
+
subject.should_receive(:sleep).with(1).and_return true
|
24
|
+
|
25
|
+
subject.start
|
26
|
+
end
|
27
|
+
end
|
data/taskbag.gemspec
CHANGED
metadata
CHANGED
@@ -1,18 +1,20 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: taskbag
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 2.0.0
|
5
|
+
prerelease:
|
5
6
|
platform: ruby
|
6
7
|
authors:
|
7
8
|
- Carlos Palhares
|
8
9
|
autorequire:
|
9
10
|
bindir: bin
|
10
11
|
cert_chain: []
|
11
|
-
date: 2013-
|
12
|
+
date: 2013-09-06 00:00:00.000000000 Z
|
12
13
|
dependencies:
|
13
14
|
- !ruby/object:Gem::Dependency
|
14
15
|
name: bundler
|
15
16
|
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
16
18
|
requirements:
|
17
19
|
- - ~>
|
18
20
|
- !ruby/object:Gem::Version
|
@@ -20,6 +22,7 @@ dependencies:
|
|
20
22
|
type: :development
|
21
23
|
prerelease: false
|
22
24
|
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
23
26
|
requirements:
|
24
27
|
- - ~>
|
25
28
|
- !ruby/object:Gem::Version
|
@@ -27,15 +30,33 @@ dependencies:
|
|
27
30
|
- !ruby/object:Gem::Dependency
|
28
31
|
name: rake
|
29
32
|
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
30
34
|
requirements:
|
31
|
-
- - '>='
|
35
|
+
- - ! '>='
|
32
36
|
- !ruby/object:Gem::Version
|
33
37
|
version: '0'
|
34
38
|
type: :development
|
35
39
|
prerelease: false
|
36
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
37
42
|
requirements:
|
38
|
-
- - '>='
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rspec
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: '0'
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
39
60
|
- !ruby/object:Gem::Version
|
40
61
|
version: '0'
|
41
62
|
description: A simplistic task of bags implementation for multithreaded scripts
|
@@ -53,29 +74,36 @@ files:
|
|
53
74
|
- lib/taskbag/bag.rb
|
54
75
|
- lib/taskbag/version.rb
|
55
76
|
- lib/taskbag/worker.rb
|
77
|
+
- spec/bag_spec.rb
|
78
|
+
- spec/spec_helper.rb
|
79
|
+
- spec/worker_spec.rb
|
56
80
|
- taskbag.gemspec
|
57
81
|
homepage: https://github.com/xjunior/taskbag
|
58
82
|
licenses:
|
59
83
|
- MIT
|
60
|
-
metadata: {}
|
61
84
|
post_install_message:
|
62
85
|
rdoc_options: []
|
63
86
|
require_paths:
|
64
87
|
- lib
|
65
88
|
required_ruby_version: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
66
90
|
requirements:
|
67
|
-
- - '>='
|
91
|
+
- - ! '>='
|
68
92
|
- !ruby/object:Gem::Version
|
69
93
|
version: '0'
|
70
94
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
95
|
+
none: false
|
71
96
|
requirements:
|
72
|
-
- - '>='
|
97
|
+
- - ! '>='
|
73
98
|
- !ruby/object:Gem::Version
|
74
99
|
version: '0'
|
75
100
|
requirements: []
|
76
101
|
rubyforge_project:
|
77
|
-
rubygems_version:
|
102
|
+
rubygems_version: 1.8.23
|
78
103
|
signing_key:
|
79
|
-
specification_version:
|
104
|
+
specification_version: 3
|
80
105
|
summary: A simplistic task of bags implementation for multithreaded scripts
|
81
|
-
test_files:
|
106
|
+
test_files:
|
107
|
+
- spec/bag_spec.rb
|
108
|
+
- spec/spec_helper.rb
|
109
|
+
- spec/worker_spec.rb
|
checksums.yaml
DELETED
@@ -1,7 +0,0 @@
|
|
1
|
-
---
|
2
|
-
SHA1:
|
3
|
-
metadata.gz: 6104422a4067a937d2139e05fa644e676d7e0933
|
4
|
-
data.tar.gz: a354e49b9fa3deec00738d544e343d63fff1fb40
|
5
|
-
SHA512:
|
6
|
-
metadata.gz: 4f0e5e4b172a6eb3a863658227bc5b044307270e0b423f75c3cd7f17b96adde322048edd50adaea96d81fb5b52dfe2fb29d914056afb1c984b90108874ea2126
|
7
|
-
data.tar.gz: bef99d3b30f7fe53e48844b18a5d77110e8c4643a0d897e45a25c2b827b00f9428a750164b0f3424f04ae44cd56b0ba8ef26701cfee9cf926bd37c3852c9e510
|