fetch 0.0.0 → 0.0.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/README.md +21 -8
- data/Rakefile +1 -1
- data/fetch.gemspec +17 -14
- data/lib/fetch.rb +49 -1
- data/lib/fetch/async.rb +21 -0
- data/lib/fetch/backend.rb +2 -0
- data/lib/fetch/backend/base.rb +15 -0
- data/lib/fetch/backend/typhoeus.rb +43 -0
- data/lib/fetch/base.rb +64 -0
- data/lib/fetch/callbacks.rb +62 -0
- data/lib/fetch/configuration.rb +30 -0
- data/lib/fetch/module.rb +17 -0
- data/lib/fetch/request.rb +156 -0
- data/lib/fetch/simple.rb +21 -0
- data/lib/fetch/version.rb +1 -1
- data/test/callbacks_test.rb +97 -0
- data/test/fetch_test.rb +689 -2
- data/test/simple_test.rb +17 -0
- data/test/test_helper.rb +14 -2
- metadata +64 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 0596dd2fcaa4c9e3be34f29b5346794cd98cfb1e
|
4
|
+
data.tar.gz: cf9479fe7e9c26b350efa01d64b5e880bdac6aa4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 34308fe5ee23a51d4974823f61020f2db9bb6b77966acbd9c72e41170523b27be4bb559b556e71c15899b5cd6b215d905c2c1489f0f110ed78a93b988c643f3e
|
7
|
+
data.tar.gz: f2004793493fd8bdefab68bcc50178a6519a205e768f3e2e7bdddc6757dd85408b0ab94f1e961208a574c7cccbaee4bf21ce62c0fa26e00e8c5f01c9b0c26560
|
data/.gitignore
CHANGED
data/README.md
CHANGED
@@ -1,28 +1,41 @@
|
|
1
|
-
[![Build Status](https://secure.travis-ci.org/
|
1
|
+
[![Build Status](https://secure.travis-ci.org/bogrobotten/fetch.png)](http://travis-ci.org/bogrobotten/fetch)
|
2
2
|
|
3
|
-
# Fetch
|
3
|
+
# Fetch
|
4
4
|
|
5
5
|
![Fetch](http://i.imgur.com/B8TXlri.png)
|
6
6
|
|
7
|
+
Fetch enables easy fetching of data from multiple web sources.
|
8
|
+
It was extracted from [Bogrobotten](http://www.bogrobotten.dk) where we use it
|
9
|
+
to fetch prices and other stuff from multiple merchants.
|
10
|
+
We use it for price comparison, but you can use it for anything that involves
|
11
|
+
fetching data from external sources.
|
12
|
+
|
13
|
+
Fetch uses the [Typhoeus](https://github.com/typhoeus/typhoeus) gem for fast
|
14
|
+
and reliable asynchronous fetches from multiple URLs.
|
15
|
+
|
7
16
|
## Installation
|
8
17
|
|
9
18
|
Add this line to your application's *Gemfile*:
|
10
19
|
|
11
|
-
|
20
|
+
```ruby
|
21
|
+
gem "fetch"
|
22
|
+
```
|
12
23
|
|
13
24
|
Then run:
|
14
25
|
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
$ gem install fetch
|
26
|
+
```bash
|
27
|
+
$ bundle
|
28
|
+
```
|
20
29
|
|
21
30
|
## Contributing
|
22
31
|
|
32
|
+
Contributions are much appreciated. To contribute:
|
33
|
+
|
23
34
|
1. Fork the project
|
24
35
|
2. Create a feature branch (`git checkout -b my-new-feature`)
|
25
36
|
3. Make your changes, including tests so it doesn't break in the future
|
26
37
|
4. Commit your changes (`git commit -am 'Add feature'`)
|
27
38
|
5. Push to the branch (`git push origin my-new-feature`)
|
28
39
|
6. Create new pull request
|
40
|
+
|
41
|
+
Please do not touch the version, as this will be updated by the owners when the gem is ready for a new release.
|
data/Rakefile
CHANGED
data/fetch.gemspec
CHANGED
@@ -3,20 +3,23 @@ lib = File.expand_path('../lib', __FILE__)
|
|
3
3
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
4
|
require 'fetch/version'
|
5
5
|
|
6
|
-
Gem::Specification.new do |
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = "fetch"
|
8
|
+
s.version = Fetch::VERSION
|
9
|
+
s.authors = ["Lasse Bunk"]
|
10
|
+
s.email = ["lassebunk@gmail.com"]
|
11
|
+
s.summary = %q{Coming}
|
12
|
+
s.description = %q{Coming}
|
13
|
+
s.homepage = "https://github.com/lassebunk/fetch"
|
14
|
+
s.license = "MIT"
|
15
15
|
|
16
|
-
|
17
|
-
|
18
|
-
|
16
|
+
s.files = `git ls-files`.split($/)
|
17
|
+
s.test_files = s.files.grep(%r{^test/})
|
18
|
+
s.require_paths = ["lib"]
|
19
19
|
|
20
|
-
|
21
|
-
|
20
|
+
s.add_dependency "typhoeus", ">= 0.6.0"
|
21
|
+
s.add_development_dependency "json"
|
22
|
+
s.add_development_dependency "minitest", ">= 5.4"
|
23
|
+
s.add_development_dependency "webmock", ">= 1.20"
|
24
|
+
s.add_development_dependency "rake"
|
22
25
|
end
|
data/lib/fetch.rb
CHANGED
@@ -1 +1,49 @@
|
|
1
|
-
require "
|
1
|
+
require "typhoeus"
|
2
|
+
|
3
|
+
%w{
|
4
|
+
version
|
5
|
+
callbacks
|
6
|
+
base
|
7
|
+
request
|
8
|
+
async
|
9
|
+
simple
|
10
|
+
module
|
11
|
+
backend
|
12
|
+
configuration
|
13
|
+
}.each do |file|
|
14
|
+
require "fetch/#{file}"
|
15
|
+
end
|
16
|
+
|
17
|
+
module Fetch
|
18
|
+
class HttpError < StandardError
|
19
|
+
attr_reader :code, :url
|
20
|
+
|
21
|
+
def initialize(code, url)
|
22
|
+
@code, @url = code, url
|
23
|
+
end
|
24
|
+
|
25
|
+
def message
|
26
|
+
"HTTP Error #{code}: #{url}"
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
class << self
|
31
|
+
# Returns a configuration object.
|
32
|
+
def config
|
33
|
+
@config ||= Configuration.new
|
34
|
+
end
|
35
|
+
|
36
|
+
# Yields a configuration block (+Fetch::Configuration+).
|
37
|
+
#
|
38
|
+
# Fetch.configure do |config|
|
39
|
+
# config.user_agent = "Custom User Agent"
|
40
|
+
# end
|
41
|
+
def configure(&block)
|
42
|
+
yield config
|
43
|
+
end
|
44
|
+
|
45
|
+
def module_cache
|
46
|
+
@module_cache ||= ModuleCache.new
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
data/lib/fetch/async.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module Fetch
|
2
|
+
module Async
|
3
|
+
def self.included(base)
|
4
|
+
base.define_callback :request,
|
5
|
+
:before_process,
|
6
|
+
:after_process
|
7
|
+
end
|
8
|
+
|
9
|
+
def requests
|
10
|
+
self.class.callbacks[:request].map do |callback|
|
11
|
+
Request.new.tap do |req|
|
12
|
+
req.before_process { before_process } if callback?(:before_process)
|
13
|
+
req.after_process { after_process } if callback?(:after_process)
|
14
|
+
req.failure { |code, url| failure(code, url) } if callback?(:failure)
|
15
|
+
req.error { |e| error(e) } if callback?(:error)
|
16
|
+
instance_exec(req, &callback)
|
17
|
+
end
|
18
|
+
end.select(&:url)
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,43 @@
|
|
1
|
+
module Fetch
|
2
|
+
module Backend
|
3
|
+
class Typhoeus < Base
|
4
|
+
def run(&progress)
|
5
|
+
hydra = ::Typhoeus::Hydra.new
|
6
|
+
|
7
|
+
build_requests(&progress).each do |request|
|
8
|
+
hydra.queue(request)
|
9
|
+
end
|
10
|
+
|
11
|
+
hydra.run
|
12
|
+
end
|
13
|
+
|
14
|
+
private
|
15
|
+
|
16
|
+
def build_requests(&progress)
|
17
|
+
requests.map do |req|
|
18
|
+
request = ::Typhoeus::Request.new(
|
19
|
+
req.url,
|
20
|
+
method: req.method,
|
21
|
+
body: req.body_string,
|
22
|
+
followlocation: req.follow_redirects,
|
23
|
+
timeout: req.timeout,
|
24
|
+
forbid_reuse: true,
|
25
|
+
headers: req.headers
|
26
|
+
)
|
27
|
+
|
28
|
+
request.on_success do |res|
|
29
|
+
req.process!(res.body, req.url, res.effective_url)
|
30
|
+
progress.call
|
31
|
+
end
|
32
|
+
|
33
|
+
request.on_failure do |res|
|
34
|
+
req.failed!(res.code, req.url)
|
35
|
+
progress.call
|
36
|
+
end
|
37
|
+
|
38
|
+
request
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|
43
|
+
end
|
data/lib/fetch/base.rb
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
# Base module for fetch handlers, e.g. +ProductFetch+, +UserFetch+, etc.
|
2
|
+
module Fetch
|
3
|
+
class Base
|
4
|
+
include Callbacks
|
5
|
+
|
6
|
+
# Set callbacks to be called when fetching.
|
7
|
+
#
|
8
|
+
# before_fetch do
|
9
|
+
# # do something before fetching
|
10
|
+
# end
|
11
|
+
#
|
12
|
+
# after_fetch do
|
13
|
+
# # do something after fetching
|
14
|
+
# end
|
15
|
+
#
|
16
|
+
# progress do |progress|
|
17
|
+
# # update progress in percent
|
18
|
+
# end
|
19
|
+
define_callback :modules,
|
20
|
+
:init,
|
21
|
+
:before_fetch,
|
22
|
+
:after_fetch,
|
23
|
+
:progress
|
24
|
+
|
25
|
+
# Begin fetching.
|
26
|
+
# Will run synchronous fetches first and async fetches afterwards.
|
27
|
+
# Updates progress when each module finishes its fetch.
|
28
|
+
def fetch
|
29
|
+
requests = instantiate_modules.select(&:fetch?).map(&:requests).flatten
|
30
|
+
|
31
|
+
total, done = requests.size, 0
|
32
|
+
update_progress(total, done)
|
33
|
+
|
34
|
+
before_fetch
|
35
|
+
|
36
|
+
backend.new(requests).run do
|
37
|
+
update_progress(total, done += 1)
|
38
|
+
end
|
39
|
+
|
40
|
+
after_fetch
|
41
|
+
|
42
|
+
true
|
43
|
+
end
|
44
|
+
|
45
|
+
private
|
46
|
+
|
47
|
+
# Array of instantiated fetch modules.
|
48
|
+
def instantiate_modules
|
49
|
+
Array(modules).map do |klass|
|
50
|
+
init(klass) || klass.new
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
# Updates progress with a percentage calculated from +total+ and +done+.
|
55
|
+
def update_progress(total, done)
|
56
|
+
percentage = total.zero? ? 100 : ((done.to_f / total) * 100).to_i
|
57
|
+
progress(percentage)
|
58
|
+
end
|
59
|
+
|
60
|
+
def backend
|
61
|
+
Fetch.config.backend
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,62 @@
|
|
1
|
+
module Fetch
|
2
|
+
module Callbacks
|
3
|
+
def self.included(base)
|
4
|
+
base.extend ClassMethods
|
5
|
+
end
|
6
|
+
|
7
|
+
private
|
8
|
+
|
9
|
+
# Check if a callback has been used.
|
10
|
+
def callback?(name)
|
11
|
+
self.class.callbacks[name].any?
|
12
|
+
end
|
13
|
+
|
14
|
+
# Run specific callbacks.
|
15
|
+
#
|
16
|
+
# run_callbacks_for(:before_fetch)
|
17
|
+
# run_callbacks_for(:progress, 12) # 12 percent done
|
18
|
+
def run_callbacks_for(name, *args)
|
19
|
+
self.class.callbacks[name].map do |block|
|
20
|
+
instance_exec(*args, &block)
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
module ClassMethods
|
25
|
+
# Hash of callback blocks to be called.
|
26
|
+
def callbacks
|
27
|
+
@callbacks ||= Hash.new { |h, k| h[k] = [] }
|
28
|
+
end
|
29
|
+
|
30
|
+
# Defines callback methods on the class level.
|
31
|
+
def define_callback(*names)
|
32
|
+
names.each do |name|
|
33
|
+
define_singleton_method name do |*values, &block|
|
34
|
+
create_callback_for(name, *values, &block)
|
35
|
+
end
|
36
|
+
|
37
|
+
define_method name do |*args|
|
38
|
+
run_callbacks_for(name, *args).last
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def inherited(base)
|
44
|
+
super
|
45
|
+
callbacks.each do |name, callbacks|
|
46
|
+
base.callbacks[name] = callbacks.dup
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
private
|
51
|
+
|
52
|
+
def create_callback_for(name, *values, &block)
|
53
|
+
add_callback(name) { values } if values.any?
|
54
|
+
add_callback(name, &block) if block
|
55
|
+
end
|
56
|
+
|
57
|
+
def add_callback(name, &block)
|
58
|
+
callbacks[name] << block
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
@@ -0,0 +1,30 @@
|
|
1
|
+
module Fetch
|
2
|
+
class Configuration
|
3
|
+
DEFAULTS = {
|
4
|
+
user_agent: "Mozilla/5.0",
|
5
|
+
timeout: 10,
|
6
|
+
namespaces: ["fetch_sources"],
|
7
|
+
raise_on_error: -> { defined?(Rails.env) && %w{development test}.include?(Rails.env) },
|
8
|
+
backend: Backend::Typhoeus
|
9
|
+
}
|
10
|
+
|
11
|
+
DEFAULTS.each do |option, value|
|
12
|
+
ivar = "@#{option}"
|
13
|
+
|
14
|
+
define_method(option) do
|
15
|
+
return instance_variable_get(ivar) if instance_variable_defined?(ivar)
|
16
|
+
value = value.call if value.is_a?(Proc)
|
17
|
+
instance_variable_set(ivar, value)
|
18
|
+
end
|
19
|
+
|
20
|
+
define_method("#{option}=") do |value|
|
21
|
+
instance_variable_set(ivar, value)
|
22
|
+
end
|
23
|
+
end
|
24
|
+
|
25
|
+
# Convenience method for defining a single namespace that contains fetch modules.
|
26
|
+
def namespace=(value)
|
27
|
+
self.namespaces = [value]
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
data/lib/fetch/module.rb
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
module Fetch
|
2
|
+
class Module
|
3
|
+
include Callbacks
|
4
|
+
include Async
|
5
|
+
|
6
|
+
define_callback :fetch_if,
|
7
|
+
:failure,
|
8
|
+
:error
|
9
|
+
|
10
|
+
# Whether or not the module should be used when fetching.
|
11
|
+
# Set with `fetch_if do ... end`.
|
12
|
+
def fetch?
|
13
|
+
return true unless callback?(:fetch_if)
|
14
|
+
!!fetch_if
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
@@ -0,0 +1,156 @@
|
|
1
|
+
require "cgi"
|
2
|
+
|
3
|
+
module Fetch
|
4
|
+
# A request to be completed with Typhoeus.
|
5
|
+
class Request
|
6
|
+
# Initializes the request and sets properties to the values defined in
|
7
|
+
# +options+.
|
8
|
+
#
|
9
|
+
# request = Fetch::Request.new("http://www.google.com", timeout: 5)
|
10
|
+
# request.url # => "http://www.google.com"
|
11
|
+
# request.timeout # => 5
|
12
|
+
#
|
13
|
+
# request = Fetch::Request.new(timeout: 5)
|
14
|
+
# request.url # => nil
|
15
|
+
# request.timeout # => 5
|
16
|
+
def initialize(*args)
|
17
|
+
options = args.pop if args.last.is_a?(Hash)
|
18
|
+
|
19
|
+
if args.any?
|
20
|
+
self.url = args.first
|
21
|
+
end
|
22
|
+
|
23
|
+
if options
|
24
|
+
options.each { |key, value| send("#{key}=", value) }
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# The URL to be requested.
|
29
|
+
attr_accessor :url
|
30
|
+
|
31
|
+
# Whether to follow redirects. Default: +true+
|
32
|
+
def follow_redirects
|
33
|
+
return @follow_redirects if defined?(@follow_redirects)
|
34
|
+
@follow_redirects = true
|
35
|
+
end
|
36
|
+
|
37
|
+
# Sets whether to follow redirects.
|
38
|
+
attr_writer :follow_redirects
|
39
|
+
|
40
|
+
# The method to be used for the request.
|
41
|
+
def method
|
42
|
+
@method || :get
|
43
|
+
end
|
44
|
+
|
45
|
+
# Sets the method to be used for the request.
|
46
|
+
attr_writer :method
|
47
|
+
|
48
|
+
# The post body to be sent with the request.
|
49
|
+
def body
|
50
|
+
@body ||= {}
|
51
|
+
end
|
52
|
+
|
53
|
+
# Sets the post body to be sent with the request.
|
54
|
+
attr_writer :body
|
55
|
+
|
56
|
+
# The post body represented as a string.
|
57
|
+
def body_string
|
58
|
+
body.map { |k, v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}" }.join("&")
|
59
|
+
end
|
60
|
+
|
61
|
+
# The timeout for the request.
|
62
|
+
# Default: Taken from +Fetch.config.timeout+
|
63
|
+
def timeout
|
64
|
+
return @timeout if defined?(@timeout)
|
65
|
+
Fetch.config.timeout
|
66
|
+
end
|
67
|
+
|
68
|
+
# Sets the timeout for the request.
|
69
|
+
attr_writer :timeout
|
70
|
+
|
71
|
+
# The headers to be sent with the request.
|
72
|
+
def headers
|
73
|
+
@headers ||= {
|
74
|
+
"User-Agent" => Fetch.config.user_agent
|
75
|
+
}
|
76
|
+
end
|
77
|
+
|
78
|
+
# Sets the headers to be sent with the request.
|
79
|
+
attr_writer :headers
|
80
|
+
|
81
|
+
# The user agent being sent with the request.
|
82
|
+
def user_agent
|
83
|
+
headers["User-Agent"]
|
84
|
+
end
|
85
|
+
|
86
|
+
# Sets the user agent to be sent with the request.
|
87
|
+
def user_agent=(value)
|
88
|
+
headers.merge! "User-Agent" => value
|
89
|
+
end
|
90
|
+
|
91
|
+
# Sets a callback to be run before each process.
|
92
|
+
def before_process(&block)
|
93
|
+
raise "You must supply a block to #{self.class.name}#before_process" unless block
|
94
|
+
@before_process_callback = block
|
95
|
+
end
|
96
|
+
|
97
|
+
# Runs the before process callback.
|
98
|
+
def before_process!
|
99
|
+
@before_process_callback.call if @before_process_callback
|
100
|
+
end
|
101
|
+
|
102
|
+
# Sets the callback to be run when the request completes.
|
103
|
+
def process(&block)
|
104
|
+
raise "You must supply a block to #{self.class.name}#process" unless block
|
105
|
+
@process_callback = block
|
106
|
+
end
|
107
|
+
|
108
|
+
# Runs the process callback. If it fails with an exception, it will send
|
109
|
+
# the exception to the error callback.
|
110
|
+
def process!(body, url, effective_url)
|
111
|
+
before_process!
|
112
|
+
@process_callback.call(body, url, effective_url) if @process_callback
|
113
|
+
after_process!
|
114
|
+
rescue => e
|
115
|
+
error!(e)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Sets a callback to be run after each process.
|
119
|
+
def after_process(&block)
|
120
|
+
raise "You must supply a block to #{self.class.name}#after_process" unless block
|
121
|
+
@after_process_callback = block
|
122
|
+
end
|
123
|
+
|
124
|
+
# Runs the after process callback.
|
125
|
+
def after_process!
|
126
|
+
@after_process_callback.call if @after_process_callback
|
127
|
+
end
|
128
|
+
|
129
|
+
# Sets the callback to be run if a request fails.
|
130
|
+
def failure(&block)
|
131
|
+
raise "You must supply a block to #{self.class.name}#failure" unless block
|
132
|
+
@failure_callback = block
|
133
|
+
end
|
134
|
+
|
135
|
+
# Runs the failure callback.
|
136
|
+
def failed!(code, url)
|
137
|
+
@failure_callback.call(code, url) if @failure_callback
|
138
|
+
end
|
139
|
+
|
140
|
+
# Sets the callback to be run if the processing fails due to an exception.
|
141
|
+
def error(&block)
|
142
|
+
raise "You must supply a block to #{self.class.name}#error" unless block
|
143
|
+
@error_callback = block
|
144
|
+
end
|
145
|
+
|
146
|
+
# Runs the error callback. Raises the exception given in +exception+ if an
|
147
|
+
# error callback isn't defined.
|
148
|
+
def error!(exception)
|
149
|
+
if @error_callback
|
150
|
+
@error_callback.call(exception)
|
151
|
+
else
|
152
|
+
raise exception
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|