taskbag 1.0.2 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,6 +1,8 @@
1
1
  # TaskBag
2
2
 
3
- TODO: Write a gem description
3
+ A super simple implementation of the Bag-of-Tasks Paradigm [1].
4
+
5
+ This gem was originally created to scrap websites and download content in a parallel fashion.
4
6
 
5
7
  ## Installation
6
8
 
@@ -18,7 +20,51 @@ Or install it yourself as:
18
20
 
19
21
  ## Usage
20
22
 
21
- TODO: Write usage instructions here
23
+ ### When to use it
24
+
25
+ You have [2]:
26
+ * A bunch of tasks to do
27
+ * They are all independent
28
+ * Similar, maybe just different input files
29
+
30
+ In TaskBag you can actually have different tasks in the same bag.
31
+
32
+ ### When not to use it
33
+
34
+ If you don't in the above.
35
+
36
+ ### How to use it
37
+
38
+ Define your job class:
39
+
40
+ ```
41
+ class DownloadVideoJob < Struct.new(:video_url)
42
+ def run
43
+ `wget -c #{self.video_url}`
44
+ end
45
+ end
46
+ ```
47
+
48
+ create your bag and open it with a number of workers:
49
+
50
+ ```
51
+ bag = TaskBag::Bag.new
52
+ bag.open 4
53
+ ```
54
+
55
+ add your tasks however you want (maybe from another worker):
56
+
57
+ ```
58
+ bag.add DownloadVideoJob.new('http://www.sometube.com/video.mp4')
59
+ ```
60
+
61
+ don't *ever* forget to close the bag, or your application might finish before all the jobs are run:
62
+
63
+ ```
64
+ bag.close!
65
+ ```
66
+
67
+ As said before, you can add more tasks as the result of a job, for example, if you're writting a page scrapper, you can initiate your taskbag with only one job to scrap the index page, which will create other jobs to scrap each of the pages and find more jobs. The number of workers will never change unless you close and open your bag again.
22
68
 
23
69
  ## Contributing
24
70
 
@@ -27,3 +73,8 @@ TODO: Write usage instructions here
27
73
  3. Commit your changes (`git commit -am 'Add some feature'`)
28
74
  4. Push to the branch (`git push origin my-new-feature`)
29
75
  5. Create new Pull Request
76
+
77
+ ## References
78
+
79
+ [1] http://www.cs.arizona.edu/~greg/mpdbook/glossary.html
80
+ [2] http://www.eead.csic.es/compbio/material/programacion_rocks/pics/paral_tareas.pdf
data/lib/taskbag/bag.rb CHANGED
@@ -1,39 +1,39 @@
1
+ require 'thread'
2
+
1
3
  module TaskBag
2
4
  class Bag
3
- def initialize(task_class)
4
- @tasks = []
5
- @task_class = task_class
6
- @semaphore = Mutex.new
5
+ def initialize(jobs=Queue.new)
6
+ @closed = true
7
+ @jobs = jobs
8
+ @threads = []
7
9
  end
8
10
 
9
- def open(nthreads)
11
+ def open(nworkers)
12
+ raise "Bag is already opened!" unless closed?
10
13
  @closed = false
11
- bag = self
12
- @threads = nthreads.times.map do |w|
13
- Thread.new { @task_class.new(bag).start }
14
+ _self = self
15
+ @threads = nworkers.times.map do
16
+ Thread.new { Worker.start(_self) }
14
17
  end
15
18
  end
16
19
 
17
- def add(object)
18
- @semaphore.synchronize {
19
- @tasks << object
20
- }
21
- end
22
-
23
- def close
24
- loop { break unless @tasks.any? }
20
+ def close!
21
+ raise 'Bag is already closed!' if closed?
22
+ loop { break if @jobs.empty? }
25
23
  @closed = true
26
24
  @threads.each{|t| t.join}
27
25
  end
28
26
 
27
+ def add(object)
28
+ @jobs.push object
29
+ end
30
+
29
31
  def closed?
30
32
  !!@closed
31
33
  end
32
34
 
33
35
  def next
34
- @semaphore.synchronize {
35
- @tasks.pop
36
- }
36
+ @jobs.pop unless @jobs.empty?
37
37
  end
38
38
  end
39
39
  end
@@ -1,3 +1,3 @@
1
1
  module TaskBag
2
- VERSION = '1.0.2'
2
+ VERSION = '2.0.0'
3
3
  end
@@ -4,15 +4,17 @@ module TaskBag
4
4
  @bag = bag
5
5
  end
6
6
 
7
+ def self.start(bag)
8
+ Worker.new(bag).start
9
+ end
10
+
7
11
  def start
8
12
  until @bag.closed?
9
- task = @bag.next
10
- (sleep(1) and next) if task.nil?
13
+ job = @bag.next
14
+ (sleep(1) and next) if job.nil?
11
15
 
12
- self.run(task)
16
+ job.run
13
17
  end
14
18
  end
15
-
16
- def run(task); end
17
19
  end
18
20
  end
data/spec/bag_spec.rb ADDED
@@ -0,0 +1,79 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaskBag::Bag do
4
+ let(:mocked_queue) { double('mocked queue', empty?: true) }
5
+ subject { TaskBag::Bag.new }
6
+
7
+ it { should be_closed }
8
+
9
+ describe 'opening a bag' do
10
+ it 'makes the bag open' do
11
+ subject.open 0
12
+ subject.should_not be_closed
13
+ subject.close!
14
+ end
15
+
16
+ it 'raises an exception if already open' do
17
+ subject.open 0
18
+ expect { subject.open(0) }.to raise_error
19
+ end
20
+
21
+ it 'creates the given number of workers in different threads' do
22
+ TaskBag::Worker.should_receive(:start)
23
+ .with(subject)
24
+ .exactly(3).times
25
+ .and_return 1, 2, 3
26
+
27
+ subject.open(3).should have(3).items
28
+ subject.close!
29
+ end
30
+ end
31
+
32
+ describe 'closing a bag' do
33
+ subject { TaskBag::Bag.new(mocked_queue) }
34
+
35
+ it 'should be closed after' do
36
+ mocked_queue.should_receive(:empty?).and_return true
37
+ subject.open 0
38
+ subject.close!
39
+ subject.should be_closed
40
+ end
41
+
42
+ it 'raises an exception if already closed' do
43
+ expect { subject.close! }.to raise_error
44
+ end
45
+
46
+ it 'waits for all jobs to be passed to workers before closing' do
47
+ mocked_queue.should_receive(:empty?).and_return false, false, true
48
+ subject.open 0
49
+ subject.close!
50
+ end
51
+
52
+ context 'a full bag' do
53
+ subject { TaskBag::Bag.new }
54
+ it 'waits for every worker to finish before it closes' do
55
+ finished = 0
56
+ job = double('job')
57
+ job.stub :run do
58
+ sleep 0.5
59
+ finished += 1
60
+ end
61
+ subject.open 2
62
+ subject.add job
63
+ subject.add job
64
+ subject.add job
65
+ subject.close!
66
+ finished.should == 3
67
+ end
68
+ end
69
+ end
70
+
71
+ it 'execute the jobs in a FIFO fashion' do
72
+ subject.add 1
73
+ subject.add 2
74
+
75
+ subject.next.should be 1
76
+ subject.next.should be 2
77
+ subject.next.should be_nil
78
+ end
79
+ end
@@ -0,0 +1 @@
1
+ require_relative '../lib/taskbag'
@@ -0,0 +1,27 @@
1
+ require 'spec_helper'
2
+
3
+ describe TaskBag::Worker do
4
+ let(:bag) { double('Bag')}
5
+ subject { TaskBag::Worker.new(bag) }
6
+
7
+ it 'runs tasks until bag is closed' do
8
+ job1, job2 = double('job 1'), double('job 2')
9
+ job1.should_receive(:run).with no_args
10
+ job2.should_receive(:run).with no_args
11
+ bag.should_receive(:closed?).and_return false, false, true
12
+ bag.should_receive(:next).and_return job1, job2
13
+
14
+ subject.start
15
+ end
16
+
17
+ it 'worker keep asking for jobs every second' do
18
+ job = double('job')
19
+ job.should_receive(:run).with no_args
20
+ bag.should_receive(:closed?).and_return false, false, true
21
+ bag.should_receive(:next).and_return nil, job
22
+
23
+ subject.should_receive(:sleep).with(1).and_return true
24
+
25
+ subject.start
26
+ end
27
+ end
data/taskbag.gemspec CHANGED
@@ -21,4 +21,5 @@ Gem::Specification.new do |spec|
21
21
 
22
22
  spec.add_development_dependency "bundler", "~> 1.3"
23
23
  spec.add_development_dependency "rake"
24
+ spec.add_development_dependency "rspec"
24
25
  end
metadata CHANGED
@@ -1,18 +1,20 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: taskbag
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.2
4
+ version: 2.0.0
5
+ prerelease:
5
6
  platform: ruby
6
7
  authors:
7
8
  - Carlos Palhares
8
9
  autorequire:
9
10
  bindir: bin
10
11
  cert_chain: []
11
- date: 2013-07-23 00:00:00.000000000 Z
12
+ date: 2013-09-06 00:00:00.000000000 Z
12
13
  dependencies:
13
14
  - !ruby/object:Gem::Dependency
14
15
  name: bundler
15
16
  requirement: !ruby/object:Gem::Requirement
17
+ none: false
16
18
  requirements:
17
19
  - - ~>
18
20
  - !ruby/object:Gem::Version
@@ -20,6 +22,7 @@ dependencies:
20
22
  type: :development
21
23
  prerelease: false
22
24
  version_requirements: !ruby/object:Gem::Requirement
25
+ none: false
23
26
  requirements:
24
27
  - - ~>
25
28
  - !ruby/object:Gem::Version
@@ -27,15 +30,33 @@ dependencies:
27
30
  - !ruby/object:Gem::Dependency
28
31
  name: rake
29
32
  requirement: !ruby/object:Gem::Requirement
33
+ none: false
30
34
  requirements:
31
- - - '>='
35
+ - - ! '>='
32
36
  - !ruby/object:Gem::Version
33
37
  version: '0'
34
38
  type: :development
35
39
  prerelease: false
36
40
  version_requirements: !ruby/object:Gem::Requirement
41
+ none: false
37
42
  requirements:
38
- - - '>='
43
+ - - ! '>='
44
+ - !ruby/object:Gem::Version
45
+ version: '0'
46
+ - !ruby/object:Gem::Dependency
47
+ name: rspec
48
+ requirement: !ruby/object:Gem::Requirement
49
+ none: false
50
+ requirements:
51
+ - - ! '>='
52
+ - !ruby/object:Gem::Version
53
+ version: '0'
54
+ type: :development
55
+ prerelease: false
56
+ version_requirements: !ruby/object:Gem::Requirement
57
+ none: false
58
+ requirements:
59
+ - - ! '>='
39
60
  - !ruby/object:Gem::Version
40
61
  version: '0'
41
62
  description: A simplistic task of bags implementation for multithreaded scripts
@@ -53,29 +74,36 @@ files:
53
74
  - lib/taskbag/bag.rb
54
75
  - lib/taskbag/version.rb
55
76
  - lib/taskbag/worker.rb
77
+ - spec/bag_spec.rb
78
+ - spec/spec_helper.rb
79
+ - spec/worker_spec.rb
56
80
  - taskbag.gemspec
57
81
  homepage: https://github.com/xjunior/taskbag
58
82
  licenses:
59
83
  - MIT
60
- metadata: {}
61
84
  post_install_message:
62
85
  rdoc_options: []
63
86
  require_paths:
64
87
  - lib
65
88
  required_ruby_version: !ruby/object:Gem::Requirement
89
+ none: false
66
90
  requirements:
67
- - - '>='
91
+ - - ! '>='
68
92
  - !ruby/object:Gem::Version
69
93
  version: '0'
70
94
  required_rubygems_version: !ruby/object:Gem::Requirement
95
+ none: false
71
96
  requirements:
72
- - - '>='
97
+ - - ! '>='
73
98
  - !ruby/object:Gem::Version
74
99
  version: '0'
75
100
  requirements: []
76
101
  rubyforge_project:
77
- rubygems_version: 2.0.2
102
+ rubygems_version: 1.8.23
78
103
  signing_key:
79
- specification_version: 4
104
+ specification_version: 3
80
105
  summary: A simplistic task of bags implementation for multithreaded scripts
81
- test_files: []
106
+ test_files:
107
+ - spec/bag_spec.rb
108
+ - spec/spec_helper.rb
109
+ - spec/worker_spec.rb
checksums.yaml DELETED
@@ -1,7 +0,0 @@
1
- ---
2
- SHA1:
3
- metadata.gz: 6104422a4067a937d2139e05fa644e676d7e0933
4
- data.tar.gz: a354e49b9fa3deec00738d544e343d63fff1fb40
5
- SHA512:
6
- metadata.gz: 4f0e5e4b172a6eb3a863658227bc5b044307270e0b423f75c3cd7f17b96adde322048edd50adaea96d81fb5b52dfe2fb29d914056afb1c984b90108874ea2126
7
- data.tar.gz: bef99d3b30f7fe53e48844b18a5d77110e8c4643a0d897e45a25c2b827b00f9428a750164b0f3424f04ae44cd56b0ba8ef26701cfee9cf926bd37c3852c9e510