wire 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +4 -0
- data/.rspec +5 -0
- data/Gemfile +4 -0
- data/README.markdown +98 -0
- data/Rakefile +2 -0
- data/lib/wire.rb +65 -0
- data/spec/spec_helper.rb +6 -0
- data/spec/wire_spec.rb +103 -0
- data/wire.gemspec +23 -0
- metadata +76 -0
data/.gitignore
ADDED
data/.rspec
ADDED
data/Gemfile
ADDED
data/README.markdown
ADDED
@@ -0,0 +1,98 @@
|
|
1
|
+
# Wire
|
2
|
+
|
3
|
+
Run a strict amount of threads during a time interval, primarily used for [web scraping](http://en.wikipedia.org/wiki/Web_scraping).
|
4
|
+
|
5
|
+
## How to use
|
6
|
+
|
7
|
+
### Example 1 - Basic
|
8
|
+
|
9
|
+
Start 100 threads, only run 10 at the same time, with a 3 second delay between each new thread, except the first 10.
|
10
|
+
|
11
|
+
100.times do
|
12
|
+
Wire.new(max: 10, wait: 3) do
|
13
|
+
# Do stuff
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
### Example 2 - Timer
|
18
|
+
|
19
|
+
11.times do
|
20
|
+
Wire.new(max: 10, wait: 1) do
|
21
|
+
sleep 0.1
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
Time to run: ~ *1.2 seconds*.
|
26
|
+
|
27
|
+
This is how it works.
|
28
|
+
|
29
|
+
- 11 threads is created, done at time 0
|
30
|
+
- Running 10 threads, done at time 0.1
|
31
|
+
- Wait 1 second, done at time 1.1
|
32
|
+
- Start the 11th thread, done at time 1.2
|
33
|
+
|
34
|
+
### Example 3 - Pass arguments
|
35
|
+
|
36
|
+
Wire.new(max: 10, wait: 1, vars: ["A", "B"]) do |first, last|
|
37
|
+
puts first # => "A"
|
38
|
+
puts last # => "B"
|
39
|
+
end
|
40
|
+
|
41
|
+
100.times do |n|
|
42
|
+
Wire.new(max: 10, wait: 1, vars: [n]) do |counter|
|
43
|
+
puts counter
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
# => 1 2 3 4 5 ...
|
48
|
+
|
49
|
+
### Example 4 - Scraping
|
50
|
+
|
51
|
+
This project was originally build to solve the request limit problem when using [Spotify´s Meta API](http://developer.spotify.com/en/metadata-api/overview/).
|
52
|
+
|
53
|
+
> In order to make the Metadata API snappy and open for everyone to use, rate limiting rules apply. If you make too many requests too fast, you’ll start getting 403 Forbidden responses. When rate limiting has kicked in, you’ll have to wait 10 seconds before making more requests. The rate limit is currently 10 request per second per ip. This may change.
|
54
|
+
|
55
|
+
We wanted to make as many request as possible without being banned due to the rate limit.
|
56
|
+
|
57
|
+
require "rest-client"
|
58
|
+
require "wire"
|
59
|
+
require "uri"
|
60
|
+
|
61
|
+
a_very_large_list_of_songs = ["Sweet Home Alabama", ...]
|
62
|
+
|
63
|
+
a_very_large_list_of_songs.each do |s|
|
64
|
+
Wire.new(max: 10, wait: 1, vars: [s]) do |song|
|
65
|
+
data = RestClient.get "http://ws.spotify.com/search/1/track.json?q=#{URI.encode(song)}"
|
66
|
+
# Do something with the data
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
### Tip
|
71
|
+
|
72
|
+
Don't forget to join your threads using `Thread#join`.
|
73
|
+
|
74
|
+
list = []
|
75
|
+
10.times do |n|
|
76
|
+
list << Thread.new do
|
77
|
+
# Do stuff
|
78
|
+
end
|
79
|
+
end
|
80
|
+
list.map(&:join)
|
81
|
+
|
82
|
+
Read more about [#join](http://corelib.rubyonrails.org/classes/Thread.html#M001145) here.
|
83
|
+
|
84
|
+
## Arguments to pass
|
85
|
+
|
86
|
+
Ingoing arguments to `new`.
|
87
|
+
|
88
|
+
- **max** (Integer) The maximum amount of threads to run a the same time. The value 10 will be used if `max` is nil or zero.
|
89
|
+
- **wait** (Integer) The time to wait before starting a new thread.
|
90
|
+
- **vars** (Array) A list of arguments to the block.
|
91
|
+
|
92
|
+
## How do install
|
93
|
+
|
94
|
+
[sudo] gem install wire
|
95
|
+
|
96
|
+
## Requirements
|
97
|
+
|
98
|
+
Wire is tested on OS X 10.6.7 using Ruby 1.9.2.
|
data/Rakefile
ADDED
data/lib/wire.rb
ADDED
@@ -0,0 +1,65 @@
|
|
1
|
+
require "thread"
|
2
|
+
require "monitor"
|
3
|
+
|
4
|
+
class Wire < Thread
|
5
|
+
def self.counter
|
6
|
+
@counter ||= Counter.new
|
7
|
+
end
|
8
|
+
|
9
|
+
def initialize(args, &block)
|
10
|
+
args.keys.each { |name| instance_variable_set "@" + name.to_s, args[name] }
|
11
|
+
|
12
|
+
if @max.to_i <= 0 or @wait.nil?
|
13
|
+
warn "Both max and wait needs to be passed, where max > 0. Using default values"
|
14
|
+
@max = 10 if @max.to_i <= 0
|
15
|
+
@wait ||= 1
|
16
|
+
end
|
17
|
+
|
18
|
+
@block = block
|
19
|
+
@counter = Wire.counter
|
20
|
+
|
21
|
+
@counter.synchronize do
|
22
|
+
@counter.cond.wait_until { @counter.i < @max }
|
23
|
+
@counter.inc
|
24
|
+
end
|
25
|
+
|
26
|
+
if @counter.last and (t = Time.now.to_f - @counter.last) < @wait
|
27
|
+
sleep (@wait - t)
|
28
|
+
end
|
29
|
+
|
30
|
+
super { runner }
|
31
|
+
end
|
32
|
+
|
33
|
+
def runner
|
34
|
+
@block.call(*@vars)
|
35
|
+
rescue => error
|
36
|
+
raise error
|
37
|
+
ensure
|
38
|
+
@counter.synchronize do
|
39
|
+
if @max == @counter.i or @counter.last
|
40
|
+
@counter.last = Time.now.to_f
|
41
|
+
end
|
42
|
+
@counter.dec
|
43
|
+
@counter.cond.signal
|
44
|
+
end
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
class Counter
|
49
|
+
attr_reader :i, :cond
|
50
|
+
attr_accessor :last
|
51
|
+
|
52
|
+
def initialize
|
53
|
+
extend(MonitorMixin)
|
54
|
+
@i = 0
|
55
|
+
@cond = new_cond
|
56
|
+
end
|
57
|
+
|
58
|
+
def inc
|
59
|
+
@i += 1
|
60
|
+
end
|
61
|
+
|
62
|
+
def dec
|
63
|
+
@i -= 1
|
64
|
+
end
|
65
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/spec/wire_spec.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require "spec_helper"
|
2
|
+
|
3
|
+
def runner(times, options)
|
4
|
+
list = []
|
5
|
+
times.times do |n|
|
6
|
+
list << Wire.new(options) do |var|
|
7
|
+
yield
|
8
|
+
end
|
9
|
+
end
|
10
|
+
list.map(&:join)
|
11
|
+
end
|
12
|
+
|
13
|
+
def time
|
14
|
+
Time.now.to_f
|
15
|
+
end
|
16
|
+
|
17
|
+
describe Wire do
|
18
|
+
before(:each) do
|
19
|
+
counter = Counter.new
|
20
|
+
Wire.should_receive(:counter).any_number_of_times.and_return(counter)
|
21
|
+
end
|
22
|
+
|
23
|
+
context "should be able to do run within the time limit" do
|
24
|
+
it "< max threads" do
|
25
|
+
start = time
|
26
|
+
runner(10, {max: 10, wait: 1}) do
|
27
|
+
sleep 0.1
|
28
|
+
end
|
29
|
+
|
30
|
+
(time - start).should < 1.15
|
31
|
+
end
|
32
|
+
|
33
|
+
it "> max threads" do
|
34
|
+
start = time
|
35
|
+
runner(11, {max: 10, wait: 1}) do
|
36
|
+
sleep 0.1
|
37
|
+
end
|
38
|
+
|
39
|
+
(time - start).should > 1.2
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should run using one thread" do
|
44
|
+
start = time
|
45
|
+
runner(1, {max: 1, wait: 1}) do
|
46
|
+
sleep 0.1
|
47
|
+
end
|
48
|
+
|
49
|
+
(time - start).should < 0.2
|
50
|
+
end
|
51
|
+
|
52
|
+
it "should run using one thread, using a high max value" do
|
53
|
+
start = time
|
54
|
+
runner(1, {max: 100, wait: 1}) do
|
55
|
+
sleep 0.1
|
56
|
+
end
|
57
|
+
|
58
|
+
(time - start).should < 0.2
|
59
|
+
end
|
60
|
+
|
61
|
+
it "it should not wait" do
|
62
|
+
start = time
|
63
|
+
runner(11, {max: 10, wait: 0}) do
|
64
|
+
sleep 0.1
|
65
|
+
end
|
66
|
+
|
67
|
+
(time - start).should < 0.25
|
68
|
+
end
|
69
|
+
|
70
|
+
context "error" do
|
71
|
+
it "should use the default values if wrong arguments is being passed" do
|
72
|
+
w = Wire.new(max: 0) {}.join
|
73
|
+
w.instance_eval do
|
74
|
+
@max.should == 10
|
75
|
+
@wait.should == 1
|
76
|
+
end
|
77
|
+
end
|
78
|
+
|
79
|
+
it "should use the default values if nothing is being passed" do
|
80
|
+
w = Wire.new({}) {}.join
|
81
|
+
w.instance_eval do
|
82
|
+
@max.should == 10
|
83
|
+
@wait.should == 1
|
84
|
+
end
|
85
|
+
end
|
86
|
+
|
87
|
+
it "should use the default values if wrong arguments is being passed" do
|
88
|
+
w = Wire.new(wait: 5) {}.join
|
89
|
+
w.instance_eval do
|
90
|
+
@max.should == 10
|
91
|
+
@wait.should == 5
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
it "should be possible to raise an error" do
|
96
|
+
lambda do
|
97
|
+
Wire.new(wait: 5, max: 1) do
|
98
|
+
raise StandardError.new
|
99
|
+
end.join
|
100
|
+
end.should raise_error(StandardError)
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
data/wire.gemspec
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
$:.push File.expand_path("../lib", __FILE__)
|
3
|
+
|
4
|
+
Gem::Specification.new do |s|
|
5
|
+
s.name = "wire"
|
6
|
+
s.version = "0.1.0"
|
7
|
+
s.platform = Gem::Platform::RUBY
|
8
|
+
s.authors = ["Linus Oleander"]
|
9
|
+
s.email = ["linus@oleander.nu"]
|
10
|
+
s.homepage = "https://github.com/Oleander/Wire"
|
11
|
+
s.summary = %q{Run a strict amount of threads during a time interval}
|
12
|
+
s.description = %q{Run a strict amount of threads during a time interval, primarily used for web scraping.}
|
13
|
+
|
14
|
+
s.rubyforge_project = "wire"
|
15
|
+
|
16
|
+
s.files = `git ls-files`.split("\n")
|
17
|
+
s.test_files = `git ls-files -- {test,spec,features}/*`.split("\n")
|
18
|
+
s.executables = `git ls-files -- bin/*`.split("\n").map{ |f| File.basename(f) }
|
19
|
+
s.require_paths = ["lib"]
|
20
|
+
|
21
|
+
s.required_ruby_version = "~> 1.9.0"
|
22
|
+
s.add_development_dependency("rspec", "~> 2.5.0")
|
23
|
+
end
|
metadata
ADDED
@@ -0,0 +1,76 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: wire
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
prerelease:
|
5
|
+
version: 0.1.0
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Linus Oleander
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
|
13
|
+
date: 2011-03-26 23:00:00 +01:00
|
14
|
+
default_executable:
|
15
|
+
dependencies:
|
16
|
+
- !ruby/object:Gem::Dependency
|
17
|
+
name: rspec
|
18
|
+
prerelease: false
|
19
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
20
|
+
none: false
|
21
|
+
requirements:
|
22
|
+
- - ~>
|
23
|
+
- !ruby/object:Gem::Version
|
24
|
+
version: 2.5.0
|
25
|
+
type: :development
|
26
|
+
version_requirements: *id001
|
27
|
+
description: Run a strict amount of threads during a time interval, primarily used for web scraping.
|
28
|
+
email:
|
29
|
+
- linus@oleander.nu
|
30
|
+
executables: []
|
31
|
+
|
32
|
+
extensions: []
|
33
|
+
|
34
|
+
extra_rdoc_files: []
|
35
|
+
|
36
|
+
files:
|
37
|
+
- .gitignore
|
38
|
+
- .rspec
|
39
|
+
- Gemfile
|
40
|
+
- README.markdown
|
41
|
+
- Rakefile
|
42
|
+
- lib/wire.rb
|
43
|
+
- spec/spec_helper.rb
|
44
|
+
- spec/wire_spec.rb
|
45
|
+
- wire.gemspec
|
46
|
+
has_rdoc: true
|
47
|
+
homepage: https://github.com/Oleander/Wire
|
48
|
+
licenses: []
|
49
|
+
|
50
|
+
post_install_message:
|
51
|
+
rdoc_options: []
|
52
|
+
|
53
|
+
require_paths:
|
54
|
+
- lib
|
55
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
56
|
+
none: false
|
57
|
+
requirements:
|
58
|
+
- - ~>
|
59
|
+
- !ruby/object:Gem::Version
|
60
|
+
version: 1.9.0
|
61
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
62
|
+
none: false
|
63
|
+
requirements:
|
64
|
+
- - ">="
|
65
|
+
- !ruby/object:Gem::Version
|
66
|
+
version: "0"
|
67
|
+
requirements: []
|
68
|
+
|
69
|
+
rubyforge_project: wire
|
70
|
+
rubygems_version: 1.5.0
|
71
|
+
signing_key:
|
72
|
+
specification_version: 3
|
73
|
+
summary: Run a strict amount of threads during a time interval
|
74
|
+
test_files:
|
75
|
+
- spec/spec_helper.rb
|
76
|
+
- spec/wire_spec.rb
|