fetch 0.0.0 → 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9b9c344461a2663701702dd9fda6ce2429671106
4
- data.tar.gz: aa57e886b43146a90c724251e3b96e76d792cc52
3
+ metadata.gz: 0596dd2fcaa4c9e3be34f29b5346794cd98cfb1e
4
+ data.tar.gz: cf9479fe7e9c26b350efa01d64b5e880bdac6aa4
5
5
  SHA512:
6
- metadata.gz: 40d701ecdcd26c12c0ed34a8a9c50cd93cc8aa4fd4907e6bb87187fa5595a9b7777b411c71bc5cf8ba9344b10e2b348cec4051586cf7a288c6a5e4c43ce2b4b7
7
- data.tar.gz: af7e724a8df6864e55bf46c098c8a7b7415faa74e128d0962b0e7f70eb28be8438011d794b3fd65578643ae970c225b552223d63c85e9ecf03e51ecd6b297fc3
6
+ metadata.gz: 34308fe5ee23a51d4974823f61020f2db9bb6b77966acbd9c72e41170523b27be4bb559b556e71c15899b5cd6b215d905c2c1489f0f110ed78a93b988c643f3e
7
+ data.tar.gz: f2004793493fd8bdefab68bcc50178a6519a205e768f3e2e7bdddc6757dd85408b0ab94f1e961208a574c7cccbaee4bf21ce62c0fa26e00e8c5f01c9b0c26560
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ *.log
data/README.md CHANGED
@@ -1,28 +1,41 @@
1
- [![Build Status](https://secure.travis-ci.org/lassebunk/fetch.png)](http://travis-ci.org/lassebunk/fetch)
1
+ [![Build Status](https://secure.travis-ci.org/bogrobotten/fetch.png)](http://travis-ci.org/bogrobotten/fetch)
2
2
 
3
- # Fetch!
3
+ # Fetch
4
4
 
5
5
  ![Fetch](http://i.imgur.com/B8TXlri.png)
6
6
 
7
+ Fetch enables easy fetching of data from multiple web sources.
8
+ It was extracted from [Bogrobotten](http://www.bogrobotten.dk) where we use it
9
+ to fetch prices and other stuff from multiple merchants.
10
+ We use it for price comparison, but you can use it for anything that involves
11
+ fetching data from external sources.
12
+
13
+ Fetch uses the [Typhoeus](https://github.com/typhoeus/typhoeus) gem for fast
14
+ and reliable asynchronous fetches from multiple URLs.
15
+
7
16
  ## Installation
8
17
 
9
18
  Add this line to your application's *Gemfile*:
10
19
 
11
- gem "fetch"
20
+ ```ruby
21
+ gem "fetch"
22
+ ```
12
23
 
13
24
  Then run:
14
25
 
15
- $ bundle
16
-
17
- Or install it yourself:
18
-
19
- $ gem install fetch
26
+ ```bash
27
+ $ bundle
28
+ ```
20
29
 
21
30
  ## Contributing
22
31
 
32
+ Contributions are much appreciated. To contribute:
33
+
23
34
  1. Fork the project
24
35
  2. Create a feature branch (`git checkout -b my-new-feature`)
25
36
  3. Make your changes, including tests so it doesn't break in the future
26
37
  4. Commit your changes (`git commit -am 'Add feature'`)
27
38
  5. Push to the branch (`git push origin my-new-feature`)
28
39
  6. Create new pull request
40
+
41
+ Please do not touch the version, as this will be updated by the owners when the gem is ready for a new release.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require "rake/testtask"
3
3
 
4
4
  Rake::TestTask.new do |t|
5
5
  t.libs << "test"
6
- t.test_files = FileList["test/*_test.rb"]
6
+ t.test_files = FileList["test/**/*_test.rb"]
7
7
  t.verbose = true
8
8
  end
9
9
 
@@ -3,20 +3,23 @@ lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'fetch/version'
5
5
 
6
- Gem::Specification.new do |spec|
7
- spec.name = "fetch"
8
- spec.version = Fetch::VERSION
9
- spec.authors = ["Lasse Bunk"]
10
- spec.email = ["lassebunk@gmail.com"]
11
- spec.summary = %q{Coming}
12
- spec.description = %q{Coming}
13
- spec.homepage = "https://github.com/lassebunk/fetch"
14
- spec.license = "MIT"
6
+ Gem::Specification.new do |s|
7
+ s.name = "fetch"
8
+ s.version = Fetch::VERSION
9
+ s.authors = ["Lasse Bunk"]
10
+ s.email = ["lassebunk@gmail.com"]
11
+ s.summary = %q{Coming}
12
+ s.description = %q{Coming}
13
+ s.homepage = "https://github.com/lassebunk/fetch"
14
+ s.license = "MIT"
15
15
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.test_files = spec.files.grep(%r{^test/})
18
- spec.require_paths = ["lib"]
16
+ s.files = `git ls-files`.split($/)
17
+ s.test_files = s.files.grep(%r{^test/})
18
+ s.require_paths = ["lib"]
19
19
 
20
- spec.add_development_dependency "bundler", "~> 1.5"
21
- spec.add_development_dependency "rake"
20
+ s.add_dependency "typhoeus", ">= 0.6.0"
21
+ s.add_development_dependency "json"
22
+ s.add_development_dependency "minitest", ">= 5.4"
23
+ s.add_development_dependency "webmock", ">= 1.20"
24
+ s.add_development_dependency "rake"
22
25
  end
@@ -1 +1,49 @@
1
- require "fetch/version"
1
+ require "typhoeus"
2
+
3
+ %w{
4
+ version
5
+ callbacks
6
+ base
7
+ request
8
+ async
9
+ simple
10
+ module
11
+ backend
12
+ configuration
13
+ }.each do |file|
14
+ require "fetch/#{file}"
15
+ end
16
+
17
+ module Fetch
18
+ class HttpError < StandardError
19
+ attr_reader :code, :url
20
+
21
+ def initialize(code, url)
22
+ @code, @url = code, url
23
+ end
24
+
25
+ def message
26
+ "HTTP Error #{code}: #{url}"
27
+ end
28
+ end
29
+
30
+ class << self
31
+ # Returns a configuration object.
32
+ def config
33
+ @config ||= Configuration.new
34
+ end
35
+
36
+ # Yields a configuration block (+Fetch::Configuration+).
37
+ #
38
+ # Fetch.configure do |config|
39
+ # config.user_agent = "Custom User Agent"
40
+ # end
41
+ def configure(&block)
42
+ yield config
43
+ end
44
+
45
+ def module_cache
46
+ @module_cache ||= ModuleCache.new
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ module Fetch
2
+ module Async
3
+ def self.included(base)
4
+ base.define_callback :request,
5
+ :before_process,
6
+ :after_process
7
+ end
8
+
9
+ def requests
10
+ self.class.callbacks[:request].map do |callback|
11
+ Request.new.tap do |req|
12
+ req.before_process { before_process } if callback?(:before_process)
13
+ req.after_process { after_process } if callback?(:after_process)
14
+ req.failure { |code, url| failure(code, url) } if callback?(:failure)
15
+ req.error { |e| error(e) } if callback?(:error)
16
+ instance_exec(req, &callback)
17
+ end
18
+ end.select(&:url)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,2 @@
1
+ require "fetch/backend/base"
2
+ require "fetch/backend/typhoeus"
@@ -0,0 +1,15 @@
1
+ module Fetch
2
+ module Backend
3
+ class Base
4
+ attr_reader :requests
5
+
6
+ def initialize(requests)
7
+ @requests = requests
8
+ end
9
+
10
+ def run(&progress)
11
+ raise NotImplementedError
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,43 @@
1
+ module Fetch
2
+ module Backend
3
+ class Typhoeus < Base
4
+ def run(&progress)
5
+ hydra = ::Typhoeus::Hydra.new
6
+
7
+ build_requests(&progress).each do |request|
8
+ hydra.queue(request)
9
+ end
10
+
11
+ hydra.run
12
+ end
13
+
14
+ private
15
+
16
+ def build_requests(&progress)
17
+ requests.map do |req|
18
+ request = ::Typhoeus::Request.new(
19
+ req.url,
20
+ method: req.method,
21
+ body: req.body_string,
22
+ followlocation: req.follow_redirects,
23
+ timeout: req.timeout,
24
+ forbid_reuse: true,
25
+ headers: req.headers
26
+ )
27
+
28
+ request.on_success do |res|
29
+ req.process!(res.body, req.url, res.effective_url)
30
+ progress.call
31
+ end
32
+
33
+ request.on_failure do |res|
34
+ req.failed!(res.code, req.url)
35
+ progress.call
36
+ end
37
+
38
+ request
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,64 @@
1
+ # Base module for fetch handlers, e.g. +ProductFetch+, +UserFetch+, etc.
2
+ module Fetch
3
+ class Base
4
+ include Callbacks
5
+
6
+ # Set callbacks to be called when fetching.
7
+ #
8
+ # before_fetch do
9
+ # # do something before fetching
10
+ # end
11
+ #
12
+ # after_fetch do
13
+ # # do something after fetching
14
+ # end
15
+ #
16
+ # progress do |progress|
17
+ # # update progress in percent
18
+ # end
19
+ define_callback :modules,
20
+ :init,
21
+ :before_fetch,
22
+ :after_fetch,
23
+ :progress
24
+
25
+ # Begin fetching.
26
+ # Will run synchronous fetches first and async fetches afterwards.
27
+ # Updates progress when each module finishes its fetch.
28
+ def fetch
29
+ requests = instantiate_modules.select(&:fetch?).map(&:requests).flatten
30
+
31
+ total, done = requests.size, 0
32
+ update_progress(total, done)
33
+
34
+ before_fetch
35
+
36
+ backend.new(requests).run do
37
+ update_progress(total, done += 1)
38
+ end
39
+
40
+ after_fetch
41
+
42
+ true
43
+ end
44
+
45
+ private
46
+
47
+ # Array of instantiated fetch modules.
48
+ def instantiate_modules
49
+ Array(modules).map do |klass|
50
+ init(klass) || klass.new
51
+ end
52
+ end
53
+
54
+ # Updates progress with a percentage calculated from +total+ and +done+.
55
+ def update_progress(total, done)
56
+ percentage = total.zero? ? 100 : ((done.to_f / total) * 100).to_i
57
+ progress(percentage)
58
+ end
59
+
60
+ def backend
61
+ Fetch.config.backend
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,62 @@
1
+ module Fetch
2
+ module Callbacks
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+ end
6
+
7
+ private
8
+
9
+ # Check if a callback has been used.
10
+ def callback?(name)
11
+ self.class.callbacks[name].any?
12
+ end
13
+
14
+ # Run specific callbacks.
15
+ #
16
+ # run_callbacks_for(:before_fetch)
17
+ # run_callbacks_for(:progress, 12) # 12 percent done
18
+ def run_callbacks_for(name, *args)
19
+ self.class.callbacks[name].map do |block|
20
+ instance_exec(*args, &block)
21
+ end
22
+ end
23
+
24
+ module ClassMethods
25
+ # Hash of callback blocks to be called.
26
+ def callbacks
27
+ @callbacks ||= Hash.new { |h, k| h[k] = [] }
28
+ end
29
+
30
+ # Defines callback methods on the class level.
31
+ def define_callback(*names)
32
+ names.each do |name|
33
+ define_singleton_method name do |*values, &block|
34
+ create_callback_for(name, *values, &block)
35
+ end
36
+
37
+ define_method name do |*args|
38
+ run_callbacks_for(name, *args).last
39
+ end
40
+ end
41
+ end
42
+
43
+ def inherited(base)
44
+ super
45
+ callbacks.each do |name, callbacks|
46
+ base.callbacks[name] = callbacks.dup
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ def create_callback_for(name, *values, &block)
53
+ add_callback(name) { values } if values.any?
54
+ add_callback(name, &block) if block
55
+ end
56
+
57
+ def add_callback(name, &block)
58
+ callbacks[name] << block
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ module Fetch
2
+ class Configuration
3
+ DEFAULTS = {
4
+ user_agent: "Mozilla/5.0",
5
+ timeout: 10,
6
+ namespaces: ["fetch_sources"],
7
+ raise_on_error: -> { defined?(Rails.env) && %w{development test}.include?(Rails.env) },
8
+ backend: Backend::Typhoeus
9
+ }
10
+
11
+ DEFAULTS.each do |option, value|
12
+ ivar = "@#{option}"
13
+
14
+ define_method(option) do
15
+ return instance_variable_get(ivar) if instance_variable_defined?(ivar)
16
+ value = value.call if value.is_a?(Proc)
17
+ instance_variable_set(ivar, value)
18
+ end
19
+
20
+ define_method("#{option}=") do |value|
21
+ instance_variable_set(ivar, value)
22
+ end
23
+ end
24
+
25
+ # Convenience method for defining a single namespace that contains fetch modules.
26
+ def namespace=(value)
27
+ self.namespaces = [value]
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,17 @@
1
+ module Fetch
2
+ class Module
3
+ include Callbacks
4
+ include Async
5
+
6
+ define_callback :fetch_if,
7
+ :failure,
8
+ :error
9
+
10
+ # Whether or not the module should be used when fetching.
11
+ # Set with `fetch_if do ... end`.
12
+ def fetch?
13
+ return true unless callback?(:fetch_if)
14
+ !!fetch_if
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,156 @@
1
+ require "cgi"
2
+
3
+ module Fetch
4
+ # A request to be completed with Typhoeus.
5
+ class Request
6
+ # Initializes the request and sets properties to the values defined in
7
+ # +options+.
8
+ #
9
+ # request = Fetch::Request.new("http://www.google.com", timeout: 5)
10
+ # request.url # => "http://www.google.com"
11
+ # request.timeout # => 5
12
+ #
13
+ # request = Fetch::Request.new(timeout: 5)
14
+ # request.url # => nil
15
+ # request.timeout # => 5
16
+ def initialize(*args)
17
+ options = args.pop if args.last.is_a?(Hash)
18
+
19
+ if args.any?
20
+ self.url = args.first
21
+ end
22
+
23
+ if options
24
+ options.each { |key, value| send("#{key}=", value) }
25
+ end
26
+ end
27
+
28
+ # The URL to be requested.
29
+ attr_accessor :url
30
+
31
+ # Whether to follow redirects. Default: +true+
32
+ def follow_redirects
33
+ return @follow_redirects if defined?(@follow_redirects)
34
+ @follow_redirects = true
35
+ end
36
+
37
+ # Sets whether to follow redirects.
38
+ attr_writer :follow_redirects
39
+
40
+ # The method to be used for the request.
41
+ def method
42
+ @method || :get
43
+ end
44
+
45
+ # Sets the method to be used for the request.
46
+ attr_writer :method
47
+
48
+ # The post body to be sent with the request.
49
+ def body
50
+ @body ||= {}
51
+ end
52
+
53
+ # Sets the post body to be sent with the request.
54
+ attr_writer :body
55
+
56
+ # The post body represented as a string.
57
+ def body_string
58
+ body.map { |k, v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}" }.join("&")
59
+ end
60
+
61
+ # The timeout for the request.
62
+ # Default: Taken from +Fetch.config.timeout+
63
+ def timeout
64
+ return @timeout if defined?(@timeout)
65
+ Fetch.config.timeout
66
+ end
67
+
68
+ # Sets the timeout for the request.
69
+ attr_writer :timeout
70
+
71
+ # The headers to be sent with the request.
72
+ def headers
73
+ @headers ||= {
74
+ "User-Agent" => Fetch.config.user_agent
75
+ }
76
+ end
77
+
78
+ # Sets the headers to be sent with the request.
79
+ attr_writer :headers
80
+
81
+ # The user agent being sent with the request.
82
+ def user_agent
83
+ headers["User-Agent"]
84
+ end
85
+
86
+ # Sets the user agent to be sent with the request.
87
+ def user_agent=(value)
88
+ headers.merge! "User-Agent" => value
89
+ end
90
+
91
+ # Sets a callback to be run before each process.
92
+ def before_process(&block)
93
+ raise "You must supply a block to #{self.class.name}#before_process" unless block
94
+ @before_process_callback = block
95
+ end
96
+
97
+ # Runs the before process callback.
98
+ def before_process!
99
+ @before_process_callback.call if @before_process_callback
100
+ end
101
+
102
+ # Sets the callback to be run when the request completes.
103
+ def process(&block)
104
+ raise "You must supply a block to #{self.class.name}#process" unless block
105
+ @process_callback = block
106
+ end
107
+
108
+ # Runs the process callback. If it fails with an exception, it will send
109
+ # the exception to the error callback.
110
+ def process!(body, url, effective_url)
111
+ before_process!
112
+ @process_callback.call(body, url, effective_url) if @process_callback
113
+ after_process!
114
+ rescue => e
115
+ error!(e)
116
+ end
117
+
118
+ # Sets a callback to be run after each process.
119
+ def after_process(&block)
120
+ raise "You must supply a block to #{self.class.name}#after_process" unless block
121
+ @after_process_callback = block
122
+ end
123
+
124
+ # Runs the after process callback.
125
+ def after_process!
126
+ @after_process_callback.call if @after_process_callback
127
+ end
128
+
129
+ # Sets the callback to be run if a request fails.
130
+ def failure(&block)
131
+ raise "You must supply a block to #{self.class.name}#failure" unless block
132
+ @failure_callback = block
133
+ end
134
+
135
+ # Runs the failure callback.
136
+ def failed!(code, url)
137
+ @failure_callback.call(code, url) if @failure_callback
138
+ end
139
+
140
+ # Sets the callback to be run if the processing fails due to an exception.
141
+ def error(&block)
142
+ raise "You must supply a block to #{self.class.name}#error" unless block
143
+ @error_callback = block
144
+ end
145
+
146
+ # Runs the error callback. Raises the exception given in +exception+ if an
147
+ # error callback isn't defined.
148
+ def error!(exception)
149
+ if @error_callback
150
+ @error_callback.call(exception)
151
+ else
152
+ raise exception
153
+ end
154
+ end
155
+ end
156
+ end