fetch 0.0.0 → 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 9b9c344461a2663701702dd9fda6ce2429671106
4
- data.tar.gz: aa57e886b43146a90c724251e3b96e76d792cc52
3
+ metadata.gz: 0596dd2fcaa4c9e3be34f29b5346794cd98cfb1e
4
+ data.tar.gz: cf9479fe7e9c26b350efa01d64b5e880bdac6aa4
5
5
  SHA512:
6
- metadata.gz: 40d701ecdcd26c12c0ed34a8a9c50cd93cc8aa4fd4907e6bb87187fa5595a9b7777b411c71bc5cf8ba9344b10e2b348cec4051586cf7a288c6a5e4c43ce2b4b7
7
- data.tar.gz: af7e724a8df6864e55bf46c098c8a7b7415faa74e128d0962b0e7f70eb28be8438011d794b3fd65578643ae970c225b552223d63c85e9ecf03e51ecd6b297fc3
6
+ metadata.gz: 34308fe5ee23a51d4974823f61020f2db9bb6b77966acbd9c72e41170523b27be4bb559b556e71c15899b5cd6b215d905c2c1489f0f110ed78a93b988c643f3e
7
+ data.tar.gz: f2004793493fd8bdefab68bcc50178a6519a205e768f3e2e7bdddc6757dd85408b0ab94f1e961208a574c7cccbaee4bf21ce62c0fa26e00e8c5f01c9b0c26560
data/.gitignore CHANGED
@@ -15,3 +15,4 @@ spec/reports
15
15
  test/tmp
16
16
  test/version_tmp
17
17
  tmp
18
+ *.log
data/README.md CHANGED
@@ -1,28 +1,41 @@
1
- [![Build Status](https://secure.travis-ci.org/lassebunk/fetch.png)](http://travis-ci.org/lassebunk/fetch)
1
+ [![Build Status](https://secure.travis-ci.org/bogrobotten/fetch.png)](http://travis-ci.org/bogrobotten/fetch)
2
2
 
3
- # Fetch!
3
+ # Fetch
4
4
 
5
5
  ![Fetch](http://i.imgur.com/B8TXlri.png)
6
6
 
7
+ Fetch enables easy fetching of data from multiple web sources.
8
+ It was extracted from [Bogrobotten](http://www.bogrobotten.dk) where we use it
9
+ to fetch prices and other stuff from multiple merchants.
10
+ We use it for price comparison, but you can use it for anything that involves
11
+ fetching data from external sources.
12
+
13
+ Fetch uses the [Typhoeus](https://github.com/typhoeus/typhoeus) gem for fast
14
+ and reliable asynchronous fetches from multiple URLs.
15
+
7
16
  ## Installation
8
17
 
9
18
  Add this line to your application's *Gemfile*:
10
19
 
11
- gem "fetch"
20
+ ```ruby
21
+ gem "fetch"
22
+ ```
12
23
 
13
24
  Then run:
14
25
 
15
- $ bundle
16
-
17
- Or install it yourself:
18
-
19
- $ gem install fetch
26
+ ```bash
27
+ $ bundle
28
+ ```
20
29
 
21
30
  ## Contributing
22
31
 
32
+ Contributions are much appreciated. To contribute:
33
+
23
34
  1. Fork the project
24
35
  2. Create a feature branch (`git checkout -b my-new-feature`)
25
36
  3. Make your changes, including tests so it doesn't break in the future
26
37
  4. Commit your changes (`git commit -am 'Add feature'`)
27
38
  5. Push to the branch (`git push origin my-new-feature`)
28
39
  6. Create new pull request
40
+
41
+ Please do not touch the version, as this will be updated by the owners when the gem is ready for a new release.
data/Rakefile CHANGED
@@ -3,7 +3,7 @@ require "rake/testtask"
3
3
 
4
4
  Rake::TestTask.new do |t|
5
5
  t.libs << "test"
6
- t.test_files = FileList["test/*_test.rb"]
6
+ t.test_files = FileList["test/**/*_test.rb"]
7
7
  t.verbose = true
8
8
  end
9
9
 
@@ -3,20 +3,23 @@ lib = File.expand_path('../lib', __FILE__)
3
3
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
4
  require 'fetch/version'
5
5
 
6
- Gem::Specification.new do |spec|
7
- spec.name = "fetch"
8
- spec.version = Fetch::VERSION
9
- spec.authors = ["Lasse Bunk"]
10
- spec.email = ["lassebunk@gmail.com"]
11
- spec.summary = %q{Coming}
12
- spec.description = %q{Coming}
13
- spec.homepage = "https://github.com/lassebunk/fetch"
14
- spec.license = "MIT"
6
+ Gem::Specification.new do |s|
7
+ s.name = "fetch"
8
+ s.version = Fetch::VERSION
9
+ s.authors = ["Lasse Bunk"]
10
+ s.email = ["lassebunk@gmail.com"]
11
+ s.summary = %q{Coming}
12
+ s.description = %q{Coming}
13
+ s.homepage = "https://github.com/lassebunk/fetch"
14
+ s.license = "MIT"
15
15
 
16
- spec.files = `git ls-files`.split($/)
17
- spec.test_files = spec.files.grep(%r{^test/})
18
- spec.require_paths = ["lib"]
16
+ s.files = `git ls-files`.split($/)
17
+ s.test_files = s.files.grep(%r{^test/})
18
+ s.require_paths = ["lib"]
19
19
 
20
- spec.add_development_dependency "bundler", "~> 1.5"
21
- spec.add_development_dependency "rake"
20
+ s.add_dependency "typhoeus", ">= 0.6.0"
21
+ s.add_development_dependency "json"
22
+ s.add_development_dependency "minitest", ">= 5.4"
23
+ s.add_development_dependency "webmock", ">= 1.20"
24
+ s.add_development_dependency "rake"
22
25
  end
@@ -1 +1,49 @@
1
- require "fetch/version"
1
+ require "typhoeus"
2
+
3
+ %w{
4
+ version
5
+ callbacks
6
+ base
7
+ request
8
+ async
9
+ simple
10
+ module
11
+ backend
12
+ configuration
13
+ }.each do |file|
14
+ require "fetch/#{file}"
15
+ end
16
+
17
+ module Fetch
18
+ class HttpError < StandardError
19
+ attr_reader :code, :url
20
+
21
+ def initialize(code, url)
22
+ @code, @url = code, url
23
+ end
24
+
25
+ def message
26
+ "HTTP Error #{code}: #{url}"
27
+ end
28
+ end
29
+
30
+ class << self
31
+ # Returns a configuration object.
32
+ def config
33
+ @config ||= Configuration.new
34
+ end
35
+
36
+ # Yields a configuration block (+Fetch::Configuration+).
37
+ #
38
+ # Fetch.configure do |config|
39
+ # config.user_agent = "Custom User Agent"
40
+ # end
41
+ def configure(&block)
42
+ yield config
43
+ end
44
+
45
+ def module_cache
46
+ @module_cache ||= ModuleCache.new
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,21 @@
1
+ module Fetch
2
+ module Async
3
+ def self.included(base)
4
+ base.define_callback :request,
5
+ :before_process,
6
+ :after_process
7
+ end
8
+
9
+ def requests
10
+ self.class.callbacks[:request].map do |callback|
11
+ Request.new.tap do |req|
12
+ req.before_process { before_process } if callback?(:before_process)
13
+ req.after_process { after_process } if callback?(:after_process)
14
+ req.failure { |code, url| failure(code, url) } if callback?(:failure)
15
+ req.error { |e| error(e) } if callback?(:error)
16
+ instance_exec(req, &callback)
17
+ end
18
+ end.select(&:url)
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,2 @@
1
+ require "fetch/backend/base"
2
+ require "fetch/backend/typhoeus"
@@ -0,0 +1,15 @@
1
+ module Fetch
2
+ module Backend
3
+ class Base
4
+ attr_reader :requests
5
+
6
+ def initialize(requests)
7
+ @requests = requests
8
+ end
9
+
10
+ def run(&progress)
11
+ raise NotImplementedError
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,43 @@
1
+ module Fetch
2
+ module Backend
3
+ class Typhoeus < Base
4
+ def run(&progress)
5
+ hydra = ::Typhoeus::Hydra.new
6
+
7
+ build_requests(&progress).each do |request|
8
+ hydra.queue(request)
9
+ end
10
+
11
+ hydra.run
12
+ end
13
+
14
+ private
15
+
16
+ def build_requests(&progress)
17
+ requests.map do |req|
18
+ request = ::Typhoeus::Request.new(
19
+ req.url,
20
+ method: req.method,
21
+ body: req.body_string,
22
+ followlocation: req.follow_redirects,
23
+ timeout: req.timeout,
24
+ forbid_reuse: true,
25
+ headers: req.headers
26
+ )
27
+
28
+ request.on_success do |res|
29
+ req.process!(res.body, req.url, res.effective_url)
30
+ progress.call
31
+ end
32
+
33
+ request.on_failure do |res|
34
+ req.failed!(res.code, req.url)
35
+ progress.call
36
+ end
37
+
38
+ request
39
+ end
40
+ end
41
+ end
42
+ end
43
+ end
@@ -0,0 +1,64 @@
1
+ # Base module for fetch handlers, e.g. +ProductFetch+, +UserFetch+, etc.
2
+ module Fetch
3
+ class Base
4
+ include Callbacks
5
+
6
+ # Set callbacks to be called when fetching.
7
+ #
8
+ # before_fetch do
9
+ # # do something before fetching
10
+ # end
11
+ #
12
+ # after_fetch do
13
+ # # do something after fetching
14
+ # end
15
+ #
16
+ # progress do |progress|
17
+ # # update progress in percent
18
+ # end
19
+ define_callback :modules,
20
+ :init,
21
+ :before_fetch,
22
+ :after_fetch,
23
+ :progress
24
+
25
+ # Begin fetching.
26
+ # Will run synchronous fetches first and async fetches afterwards.
27
+ # Updates progress when each module finishes its fetch.
28
+ def fetch
29
+ requests = instantiate_modules.select(&:fetch?).map(&:requests).flatten
30
+
31
+ total, done = requests.size, 0
32
+ update_progress(total, done)
33
+
34
+ before_fetch
35
+
36
+ backend.new(requests).run do
37
+ update_progress(total, done += 1)
38
+ end
39
+
40
+ after_fetch
41
+
42
+ true
43
+ end
44
+
45
+ private
46
+
47
+ # Array of instantiated fetch modules.
48
+ def instantiate_modules
49
+ Array(modules).map do |klass|
50
+ init(klass) || klass.new
51
+ end
52
+ end
53
+
54
+ # Updates progress with a percentage calculated from +total+ and +done+.
55
+ def update_progress(total, done)
56
+ percentage = total.zero? ? 100 : ((done.to_f / total) * 100).to_i
57
+ progress(percentage)
58
+ end
59
+
60
+ def backend
61
+ Fetch.config.backend
62
+ end
63
+ end
64
+ end
@@ -0,0 +1,62 @@
1
+ module Fetch
2
+ module Callbacks
3
+ def self.included(base)
4
+ base.extend ClassMethods
5
+ end
6
+
7
+ private
8
+
9
+ # Check if a callback has been used.
10
+ def callback?(name)
11
+ self.class.callbacks[name].any?
12
+ end
13
+
14
+ # Run specific callbacks.
15
+ #
16
+ # run_callbacks_for(:before_fetch)
17
+ # run_callbacks_for(:progress, 12) # 12 percent done
18
+ def run_callbacks_for(name, *args)
19
+ self.class.callbacks[name].map do |block|
20
+ instance_exec(*args, &block)
21
+ end
22
+ end
23
+
24
+ module ClassMethods
25
+ # Hash of callback blocks to be called.
26
+ def callbacks
27
+ @callbacks ||= Hash.new { |h, k| h[k] = [] }
28
+ end
29
+
30
+ # Defines callback methods on the class level.
31
+ def define_callback(*names)
32
+ names.each do |name|
33
+ define_singleton_method name do |*values, &block|
34
+ create_callback_for(name, *values, &block)
35
+ end
36
+
37
+ define_method name do |*args|
38
+ run_callbacks_for(name, *args).last
39
+ end
40
+ end
41
+ end
42
+
43
+ def inherited(base)
44
+ super
45
+ callbacks.each do |name, callbacks|
46
+ base.callbacks[name] = callbacks.dup
47
+ end
48
+ end
49
+
50
+ private
51
+
52
+ def create_callback_for(name, *values, &block)
53
+ add_callback(name) { values } if values.any?
54
+ add_callback(name, &block) if block
55
+ end
56
+
57
+ def add_callback(name, &block)
58
+ callbacks[name] << block
59
+ end
60
+ end
61
+ end
62
+ end
@@ -0,0 +1,30 @@
1
+ module Fetch
2
+ class Configuration
3
+ DEFAULTS = {
4
+ user_agent: "Mozilla/5.0",
5
+ timeout: 10,
6
+ namespaces: ["fetch_sources"],
7
+ raise_on_error: -> { defined?(Rails.env) && %w{development test}.include?(Rails.env) },
8
+ backend: Backend::Typhoeus
9
+ }
10
+
11
+ DEFAULTS.each do |option, value|
12
+ ivar = "@#{option}"
13
+
14
+ define_method(option) do
15
+ return instance_variable_get(ivar) if instance_variable_defined?(ivar)
16
+ value = value.call if value.is_a?(Proc)
17
+ instance_variable_set(ivar, value)
18
+ end
19
+
20
+ define_method("#{option}=") do |value|
21
+ instance_variable_set(ivar, value)
22
+ end
23
+ end
24
+
25
+ # Convenience method for defining a single namespace that contains fetch modules.
26
+ def namespace=(value)
27
+ self.namespaces = [value]
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,17 @@
1
+ module Fetch
2
+ class Module
3
+ include Callbacks
4
+ include Async
5
+
6
+ define_callback :fetch_if,
7
+ :failure,
8
+ :error
9
+
10
+ # Whether or not the module should be used when fetching.
11
+ # Set with `fetch_if do ... end`.
12
+ def fetch?
13
+ return true unless callback?(:fetch_if)
14
+ !!fetch_if
15
+ end
16
+ end
17
+ end
@@ -0,0 +1,156 @@
1
+ require "cgi"
2
+
3
+ module Fetch
4
+ # A request to be completed with Typhoeus.
5
+ class Request
6
+ # Initializes the request and sets properties to the values defined in
7
+ # +options+.
8
+ #
9
+ # request = Fetch::Request.new("http://www.google.com", timeout: 5)
10
+ # request.url # => "http://www.google.com"
11
+ # request.timeout # => 5
12
+ #
13
+ # request = Fetch::Request.new(timeout: 5)
14
+ # request.url # => nil
15
+ # request.timeout # => 5
16
+ def initialize(*args)
17
+ options = args.pop if args.last.is_a?(Hash)
18
+
19
+ if args.any?
20
+ self.url = args.first
21
+ end
22
+
23
+ if options
24
+ options.each { |key, value| send("#{key}=", value) }
25
+ end
26
+ end
27
+
28
+ # The URL to be requested.
29
+ attr_accessor :url
30
+
31
+ # Whether to follow redirects. Default: +true+
32
+ def follow_redirects
33
+ return @follow_redirects if defined?(@follow_redirects)
34
+ @follow_redirects = true
35
+ end
36
+
37
+ # Sets whether to follow redirects.
38
+ attr_writer :follow_redirects
39
+
40
+ # The method to be used for the request.
41
+ def method
42
+ @method || :get
43
+ end
44
+
45
+ # Sets the method to be used for the request.
46
+ attr_writer :method
47
+
48
+ # The post body to be sent with the request.
49
+ def body
50
+ @body ||= {}
51
+ end
52
+
53
+ # Sets the post body to be sent with the request.
54
+ attr_writer :body
55
+
56
+ # The post body represented as a string.
57
+ def body_string
58
+ body.map { |k, v| "#{CGI::escape(k.to_s)}=#{CGI::escape(v.to_s)}" }.join("&")
59
+ end
60
+
61
+ # The timeout for the request.
62
+ # Default: Taken from +Fetch.config.timeout+
63
+ def timeout
64
+ return @timeout if defined?(@timeout)
65
+ Fetch.config.timeout
66
+ end
67
+
68
+ # Sets the timeout for the request.
69
+ attr_writer :timeout
70
+
71
+ # The headers to be sent with the request.
72
+ def headers
73
+ @headers ||= {
74
+ "User-Agent" => Fetch.config.user_agent
75
+ }
76
+ end
77
+
78
+ # Sets the headers to be sent with the request.
79
+ attr_writer :headers
80
+
81
+ # The user agent being sent with the request.
82
+ def user_agent
83
+ headers["User-Agent"]
84
+ end
85
+
86
+ # Sets the user agent to be sent with the request.
87
+ def user_agent=(value)
88
+ headers.merge! "User-Agent" => value
89
+ end
90
+
91
+ # Sets a callback to be run before each process.
92
+ def before_process(&block)
93
+ raise "You must supply a block to #{self.class.name}#before_process" unless block
94
+ @before_process_callback = block
95
+ end
96
+
97
+ # Runs the before process callback.
98
+ def before_process!
99
+ @before_process_callback.call if @before_process_callback
100
+ end
101
+
102
+ # Sets the callback to be run when the request completes.
103
+ def process(&block)
104
+ raise "You must supply a block to #{self.class.name}#process" unless block
105
+ @process_callback = block
106
+ end
107
+
108
+ # Runs the process callback. If it fails with an exception, it will send
109
+ # the exception to the error callback.
110
+ def process!(body, url, effective_url)
111
+ before_process!
112
+ @process_callback.call(body, url, effective_url) if @process_callback
113
+ after_process!
114
+ rescue => e
115
+ error!(e)
116
+ end
117
+
118
+ # Sets a callback to be run after each process.
119
+ def after_process(&block)
120
+ raise "You must supply a block to #{self.class.name}#after_process" unless block
121
+ @after_process_callback = block
122
+ end
123
+
124
+ # Runs the after process callback.
125
+ def after_process!
126
+ @after_process_callback.call if @after_process_callback
127
+ end
128
+
129
+ # Sets the callback to be run if a request fails.
130
+ def failure(&block)
131
+ raise "You must supply a block to #{self.class.name}#failure" unless block
132
+ @failure_callback = block
133
+ end
134
+
135
+ # Runs the failure callback.
136
+ def failed!(code, url)
137
+ @failure_callback.call(code, url) if @failure_callback
138
+ end
139
+
140
+ # Sets the callback to be run if the processing fails due to an exception.
141
+ def error(&block)
142
+ raise "You must supply a block to #{self.class.name}#error" unless block
143
+ @error_callback = block
144
+ end
145
+
146
+ # Runs the error callback. Raises the exception given in +exception+ if an
147
+ # error callback isn't defined.
148
+ def error!(exception)
149
+ if @error_callback
150
+ @error_callback.call(exception)
151
+ else
152
+ raise exception
153
+ end
154
+ end
155
+ end
156
+ end