metainspector 2.1.0 → 2.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: fcb1f8adaeb835639f198ee9ede5f54da02211fd
4
- data.tar.gz: 40bb733a1e3f2b48011c535951a2a2ac2bb097dc
3
+ metadata.gz: 9c230bbbcf325166d2e5906ed7f13af8e0f8ef06
4
+ data.tar.gz: 245e219fc420a20e8fd8eef722502a6cc23d8087
5
5
  SHA512:
6
- metadata.gz: 5f502ee4f24fb04b8d0afb8e369a5da2ac7c7c4a4b4c8a743cfbfd24f0f15125d139c5f35bd65c2fe9bce59d4015305c2fdf05f4fe6e696e691e90bb60e445b2
7
- data.tar.gz: 2f9252d0283f815b15bc7e4c43b33e588975b63a935354e3e0c34639a0bad1c96d52b63308f9902120829b3747c796fc43f36d95d5ee78237c9dbc4fb64baa80
6
+ metadata.gz: 791526a75ce5e23651a7cbded8ee4e9ab12fd75281d7089a2f2191ee44293c9f1b8d8587cae3284e2ed1c103d9020cfa2038fd1be4ddc90d2e78d1235fcc0374
7
+ data.tar.gz: f3c535c4a4b7618168e30cf2427a22365404dc00f698dde32669364a86770bc8445b20989974298afd124822fddea272afd000035663d7e0eb6cd5acb0676006
data/.travis.yml CHANGED
@@ -2,4 +2,5 @@ rvm:
2
2
  - 1.9.2
3
3
  - 1.9.3
4
4
  - 2.0.0
5
- - 2.1.0
5
+ - 2.1.0
6
+ - 2.1.1
data/README.md CHANGED
@@ -191,6 +191,17 @@ However, you can tell MetaInspector to allow these redirections with the option
191
191
  # And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
192
192
  page = MetaInspector.new('facebook.com', :allow_redirections => :all)
193
193
 
194
+ ### Headers
195
+
196
+ By default, the following headers are set:
197
+
198
+ {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
199
+
200
+ If you want to set custom headers then use the `headers` option:
201
+
202
+ # Set the User-Agent header
203
+ page = MetaInspector.new('example.com', :headers => {'User-Agent' => 'My custom User-Agent'})
204
+
194
205
  ### HTML Content Only
195
206
 
196
207
  MetaInspector will try to parse all URLs by default. If you want to raise an exception when trying to parse a non-html URL (one that has a content-type different than text/html), you can state it like this:
@@ -215,7 +226,7 @@ However, if you prefer you can also set the `warn_level: :warn` option, so that
215
226
 
216
227
  You can also set the `warn_level: :store` option so that exceptions found will be silenced, and left for you to inspect on `page.exceptions`. You can also ask for `page.ok?`, wich will return `true` if no exceptions are stored.
217
228
 
218
- You should avoid using the `:store` option, or use it wisely, as silencing errors can be problematic, it's always better to face the errors and treat them accordingly.
229
+ You should avoid using the `:store` option, or use it wisely, as silencing errors can be problematic, it's always better to face the errors and treat them accordingly.
219
230
 
220
231
  ## Examples
221
232
 
@@ -7,7 +7,6 @@ require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/reque
7
7
  require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/url'))
8
8
  require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/parser'))
9
9
  require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/document'))
10
- require File.expand_path(File.join(File.dirname(__FILE__), 'meta_inspector/deprecations'))
11
10
 
12
11
  module MetaInspector
13
12
  extend self
@@ -3,7 +3,7 @@
3
3
  module MetaInspector
4
4
  # A MetaInspector::Document knows about its URL and its contents
5
5
  class Document
6
- attr_reader :timeout, :html_content_only, :allow_redirections, :warn_level
6
+ attr_reader :timeout, :html_content_only, :allow_redirections, :warn_level, :headers
7
7
 
8
8
  include MetaInspector::Exceptionable
9
9
 
@@ -14,25 +14,22 @@ module MetaInspector
14
14
  # => allow_redirections: when :safe, allows HTTP => HTTPS redirections. When :all, it also allows HTTPS => HTTP
15
15
  # => document: the html of the url as a string
16
16
  # => warn_level: what to do when encountering exceptions. Can be :warn, :raise or nil
17
+ # => headers: object containing custom headers for the request
17
18
  def initialize(initial_url, options = {})
18
19
  options = defaults.merge(options)
19
20
  @timeout = options[:timeout]
20
21
  @html_content_only = options[:html_content_only]
21
22
  @allow_redirections = options[:allow_redirections]
22
23
  @document = options[:document]
23
-
24
- if options[:verbose] == true
25
- warn "The verbose option is deprecated since 1.17, please use warn_level: :warn instead"
26
- options[:warn_level] = :warn
27
- end
28
-
29
- @warn_level = options[:warn_level]
30
- @exception_log = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
31
- @url = MetaInspector::URL.new(initial_url, exception_log: @exception_log)
32
- @request = MetaInspector::Request.new(@url, allow_redirections: @allow_redirections,
33
- timeout: @timeout,
34
- exception_log: @exception_log) unless @document
35
- @parser = MetaInspector::Parser.new(self, exception_log: @exception_log)
24
+ @headers = options[:headers]
25
+ @warn_level = options[:warn_level]
26
+ @exception_log = options[:exception_log] || MetaInspector::ExceptionLog.new(warn_level: warn_level)
27
+ @url = MetaInspector::URL.new(initial_url, exception_log: @exception_log)
28
+ @request = MetaInspector::Request.new(@url, allow_redirections: @allow_redirections,
29
+ timeout: @timeout,
30
+ exception_log: @exception_log,
31
+ headers: @headers) unless @document
32
+ @parser = MetaInspector::Parser.new(self, exception_log: @exception_log)
36
33
  end
37
34
 
38
35
  extend Forwardable
@@ -65,7 +62,11 @@ module MetaInspector
65
62
  private
66
63
 
67
64
  def defaults
68
- { :timeout => 20, :html_content_only => false, :warn_level => :raise }
65
+ { :timeout => 20,
66
+ :html_content_only => false,
67
+ :warn_level => :raise,
68
+ :headers => {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
69
+ }
69
70
  end
70
71
 
71
72
  def document
@@ -17,6 +17,7 @@ module MetaInspector
17
17
  @allow_redirections = options[:allow_redirections]
18
18
  @timeout = options[:timeout]
19
19
  @exception_log = options[:exception_log]
20
+ @headers = options[:headers]
20
21
 
21
22
  response # as soon as it is set up, we make the request so we can fail early
22
23
  end
@@ -43,7 +44,11 @@ module MetaInspector
43
44
  end
44
45
 
45
46
  def fetch
46
- request = open(url, {:allow_redirections => @allow_redirections})
47
+ options = {}
48
+ options.merge!(:allow_redirections => @allow_redirections) if @allow_redirections
49
+ options.merge!(@headers) if @headers.is_a?(Hash)
50
+
51
+ request = open(url, options)
47
52
 
48
53
  @url.url = request.base_uri.to_s
49
54
 
@@ -51,7 +56,7 @@ module MetaInspector
51
56
  end
52
57
 
53
58
  def defaults
54
- { allow_redirections: false, timeout: 20, exception_log: MetaInspector::ExceptionLog.new }
59
+ { timeout: 20, exception_log: MetaInspector::ExceptionLog.new }
55
60
  end
56
61
  end
57
62
  end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "2.1.0"
4
+ VERSION = "2.2.0"
5
5
  end
@@ -89,4 +89,30 @@ describe MetaInspector::Document do
89
89
  tar_url.title
90
90
  end
91
91
  end
92
+
93
+ describe 'headers' do
94
+ it "should include default headers" do
95
+ url = 'http://example.com/headers'
96
+ request = double('Request', base_uri: url)
97
+ expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
98
+
99
+ MetaInspector::Request.any_instance.should_receive(:open)
100
+ .with(url, expected_headers)
101
+ .and_return(request)
102
+
103
+ MetaInspector::Document.new(url)
104
+ end
105
+
106
+ it "should include passed headers on the request" do
107
+ url = 'http://example.com/headers'
108
+ headers = {'User-Agent' => 'Mozilla', 'Referer' => 'https://github.com/'}
109
+ request = double('Request', base_uri: url)
110
+
111
+ MetaInspector::Request.any_instance.should_receive(:open)
112
+ .with(url, headers)
113
+ .and_return(request)
114
+
115
+ MetaInspector::Document.new(url, headers: headers)
116
+ end
117
+ end
92
118
  end
metadata CHANGED
@@ -1,55 +1,55 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.0
4
+ version: 2.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-02-16 00:00:00.000000000 Z
11
+ date: 2014-04-30 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - "~>"
17
+ - - ~>
18
18
  - !ruby/object:Gem::Version
19
19
  version: '1.6'
20
20
  type: :runtime
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - "~>"
24
+ - - ~>
25
25
  - !ruby/object:Gem::Version
26
26
  version: '1.6'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: open_uri_redirections
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - "~>"
31
+ - - ~>
32
32
  - !ruby/object:Gem::Version
33
33
  version: 0.1.4
34
34
  type: :runtime
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - "~>"
38
+ - - ~>
39
39
  - !ruby/object:Gem::Version
40
40
  version: 0.1.4
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: addressable
43
43
  requirement: !ruby/object:Gem::Requirement
44
44
  requirements:
45
- - - "~>"
45
+ - - ~>
46
46
  - !ruby/object:Gem::Version
47
47
  version: 2.3.5
48
48
  type: :runtime
49
49
  prerelease: false
50
50
  version_requirements: !ruby/object:Gem::Requirement
51
51
  requirements:
52
- - - "~>"
52
+ - - ~>
53
53
  - !ruby/object:Gem::Version
54
54
  version: 2.3.5
55
55
  - !ruby/object:Gem::Dependency
@@ -84,28 +84,28 @@ dependencies:
84
84
  name: awesome_print
85
85
  requirement: !ruby/object:Gem::Requirement
86
86
  requirements:
87
- - - "~>"
87
+ - - ~>
88
88
  - !ruby/object:Gem::Version
89
89
  version: 1.2.0
90
90
  type: :development
91
91
  prerelease: false
92
92
  version_requirements: !ruby/object:Gem::Requirement
93
93
  requirements:
94
- - - "~>"
94
+ - - ~>
95
95
  - !ruby/object:Gem::Version
96
96
  version: 1.2.0
97
97
  - !ruby/object:Gem::Dependency
98
98
  name: rake
99
99
  requirement: !ruby/object:Gem::Requirement
100
100
  requirements:
101
- - - "~>"
101
+ - - ~>
102
102
  - !ruby/object:Gem::Version
103
103
  version: 10.1.0
104
104
  type: :development
105
105
  prerelease: false
106
106
  version_requirements: !ruby/object:Gem::Requirement
107
107
  requirements:
108
- - - "~>"
108
+ - - ~>
109
109
  - !ruby/object:Gem::Version
110
110
  version: 10.1.0
111
111
  description: MetaInspector lets you scrape a web page and get its title, charset,
@@ -116,15 +116,14 @@ executables: []
116
116
  extensions: []
117
117
  extra_rdoc_files: []
118
118
  files:
119
- - ".gitignore"
120
- - ".rspec.example"
121
- - ".travis.yml"
119
+ - .gitignore
120
+ - .rspec.example
121
+ - .travis.yml
122
122
  - Gemfile
123
123
  - MIT-LICENSE
124
124
  - README.md
125
125
  - Rakefile
126
126
  - lib/meta_inspector.rb
127
- - lib/meta_inspector/deprecations.rb
128
127
  - lib/meta_inspector/document.rb
129
128
  - lib/meta_inspector/exception_log.rb
130
129
  - lib/meta_inspector/exceptionable.rb
@@ -182,17 +181,17 @@ require_paths:
182
181
  - lib
183
182
  required_ruby_version: !ruby/object:Gem::Requirement
184
183
  requirements:
185
- - - ">="
184
+ - - '>='
186
185
  - !ruby/object:Gem::Version
187
186
  version: '0'
188
187
  required_rubygems_version: !ruby/object:Gem::Requirement
189
188
  requirements:
190
- - - ">="
189
+ - - '>='
191
190
  - !ruby/object:Gem::Version
192
191
  version: '0'
193
192
  requirements: []
194
193
  rubyforge_project:
195
- rubygems_version: 2.1.11
194
+ rubygems_version: 2.2.2
196
195
  signing_key:
197
196
  specification_version: 4
198
197
  summary: MetaInspector is a ruby gem for web scraping purposes, that returns a hash
@@ -1,19 +0,0 @@
1
- # -*- encoding: utf-8 -*-
2
-
3
- module MetaInspector
4
- class Scraper < Document
5
- def initialize
6
- warn "The Scraper class is now deprecated since version 1.17, use Document instead"
7
- super
8
- end
9
-
10
- def errors
11
- warn "The #errors method is deprecated since version 1.17, use #exceptions instead"
12
- exceptions
13
- end
14
-
15
- def document
16
- warn "The #document method is deprecated since version 1.17, use #to_s instead"
17
- end
18
- end
19
- end