metainspector 5.1.3 → 5.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f3d01cdc6e285149cbb1bdb8cf65ebab808da8c1
4
- data.tar.gz: 15c37998971b8f63e6829e75049c0f1a819d570d
3
+ metadata.gz: 4d6981fee7a5b3c01045c6e152d20fb2c788c549
4
+ data.tar.gz: 4f0ff717747c9b1bdc5abae2eae1696bc4138033
5
5
  SHA512:
6
- metadata.gz: 24e87ba552861135cd0e5cbf98a4f29fbe4d99b27ba4365aec9f5f5902751bda8b7e299c30a83bea3517b4efc18d8677f2278166d95f4adfff673633cf3a4408
7
- data.tar.gz: 7585f94e0ba3639da3c2d0d534803b63929987cc43c466135a68b6c6aa0424131b33954e0e72dbec68240e9a33d369368345ced37b70d6fb74c67ee806823e62
6
+ metadata.gz: 782597ff87e03b177d73302efb5dcfa0713e7a27367fe93b43579960e89824caa8331a88c95d96410597d03ba0849fa4c4cf3fcf2caeb02d63bf2aa606aa4c08
7
+ data.tar.gz: 268e91699c34b9649e6441450ccb244fc1b49c6c06a3c1e6787f73fa04332b8172886228ce8a20ad714c727ee45cfa2665308aaa79b33eeef3ded7ac01225722
@@ -2,3 +2,4 @@ rvm:
2
2
  - 2.0.0
3
3
  - 2.1.8
4
4
  - 2.2.4
5
+ - 2.3.0
@@ -1,5 +1,9 @@
1
1
  # MetaInpector Changelog
2
2
 
3
+ ## [Changes in 5.2](https://github.com/jaimeiniesta/metainspector/compare/v5.1.0...v5.2.0)
4
+
5
+ Removes the deprecated `html_content_only` option, and replaces it by `allow_non_html_content`, by default `false`.
6
+
3
7
  ## [Changes in 5.1](https://github.com/jaimeiniesta/metainspector/compare/v5.0.0...v5.1.0)
4
8
 
5
9
  Deprecates the `html_content_only` option, and turns it on by default.
data/README.md CHANGED
@@ -326,24 +326,22 @@ MetaInspector.new('https://example.com', faraday_options: { ssl: { verify: false
326
326
  # Now we can access the page
327
327
  ```
328
328
 
329
- ### HTML Content Only
329
+ ### Allow non-HTML content type
330
330
 
331
- MetaInspector will try to parse all URLs by default. By default, it will raise an exception when trying to parse a non-html URL (one that has a content-type different than text/html). You can disable this behaviour with:
331
+ MetaInspector will by default raise an exception when trying to parse a non-HTML URL (one that has a content-type different than text/html). You can disable this behaviour with:
332
332
 
333
333
  ```ruby
334
- page = MetaInspector.new('sitevalidator.com', :html_content_only => false)
334
+ page = MetaInspector.new('sitevalidator.com', :allow_non_html_content => true)
335
335
  ```
336
336
 
337
- This option is deprecated since 5.1.0 and will be removed in 5.2.0.
338
-
339
337
  ```ruby
340
338
  page = MetaInspector.new('http://example.com/image.png')
341
339
  page.content_type # "image/png"
342
- page.description # will returned a garbled string
340
+ page.description # will raise an exception
343
341
 
344
- page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
342
+ page = MetaInspector.new('http://example.com/image.png', :allow_non_html_content => true)
345
343
  page.content_type # "image/png"
346
- page.description # raises an exception
344
+ page.description # will return a garbled string
347
345
  ```
348
346
 
349
347
  ### URL Normalization
@@ -438,6 +436,7 @@ You can also come to chat with us on our [Gitter room](https://gitter.im/jaimein
438
436
 
439
437
  * [go-metainspector](https://github.com/fern4lvarez/go-metainspector), a port of MetaInspector for Go.
440
438
  * [Node-MetaInspector](https://github.com/gabceb/node-metainspector), a port of MetaInspector for Node.
439
+ * [MetaInvestigator](https://github.com/nekova/metainvestigator), a port of MetaInspector for Elixir.
441
440
 
442
441
  ## License
443
442
  MetaInspector is released under the [MIT license](MIT-LICENSE).
@@ -1,7 +1,7 @@
1
1
  module MetaInspector
2
2
  # A MetaInspector::Document knows about its URL and its contents
3
3
  class Document
4
- attr_reader :html_content_only, :allow_redirections, :headers
4
+ attr_reader :allow_non_html_content, :allow_redirections, :headers
5
5
 
6
6
  # Initializes a new instance of MetaInspector::Document, setting the URL
7
7
  # Options:
@@ -14,19 +14,14 @@ module MetaInspector
14
14
  # * normalize_url: true by default
15
15
  # * faraday_options: an optional hash of options to pass to Faraday on the request
16
16
  def initialize(initial_url, options = {})
17
- unless options[:html_content_only].nil?
18
- puts <<-EOS
19
- DEPRECATION NOTICE: html_content_only is deprecated and turned on by default since 5.1.0,
20
- this option will be removed in 5.2.0
21
- EOS
22
- end
23
17
  options = defaults.merge(options)
24
18
  @connection_timeout = options[:connection_timeout]
25
19
  @read_timeout = options[:read_timeout]
26
20
  @retries = options[:retries]
27
- @html_content_only = options[:html_content_only]
28
21
 
29
- @allow_redirections = options[:allow_redirections]
22
+ @allow_redirections = options[:allow_redirections]
23
+ @allow_non_html_content = options[:allow_non_html_content]
24
+
30
25
  @document = options[:document]
31
26
  @download_images = options[:download_images]
32
27
  @headers = options[:headers]
@@ -86,16 +81,16 @@ module MetaInspector
86
81
  private
87
82
 
88
83
  def defaults
89
- { :timeout => 20,
90
- :retries => 3,
91
- :html_content_only => true,
92
- :headers => {
93
- 'User-Agent' => default_user_agent,
94
- 'Accept-Encoding' => 'identity'
95
- },
96
- :allow_redirections => true,
97
- :normalize_url => true,
98
- :download_images => true }
84
+ { :timeout => 20,
85
+ :retries => 3,
86
+ :headers => {
87
+ 'User-Agent' => default_user_agent,
88
+ 'Accept-Encoding' => 'identity'
89
+ },
90
+ :allow_redirections => true,
91
+ :allow_non_html_content => false,
92
+ :normalize_url => true,
93
+ :download_images => true }
99
94
  end
100
95
 
101
96
  def default_user_agent
@@ -103,7 +98,7 @@ module MetaInspector
103
98
  end
104
99
 
105
100
  def document
106
- @document ||= if html_content_only && !content_type.nil? && content_type != 'text/html'
101
+ @document ||= if !allow_non_html_content && !content_type.nil? && content_type != 'text/html'
107
102
  fail MetaInspector::ParserError.new "The url provided contains #{content_type} content instead of text/html content"
108
103
  else
109
104
  @request.read
@@ -1,3 +1,3 @@
1
1
  module MetaInspector
2
- VERSION = '5.1.3'
2
+ VERSION = '5.2.0'
3
3
  end
@@ -21,7 +21,7 @@ Gem::Specification.new do |gem|
21
21
  gem.add_dependency 'faraday-http-cache', '~> 1.2'
22
22
  gem.add_dependency 'faraday-encoding', '~> 0.0.3'
23
23
  gem.add_dependency 'addressable', '~> 2.4'
24
- gem.add_dependency 'fastimage', '~> 1.8.1'
24
+ gem.add_dependency 'fastimage', '~> 2.0'
25
25
  gem.add_dependency 'nesty', '~> 1.0'
26
26
 
27
27
  gem.add_development_dependency 'rspec', '~> 3.0'
@@ -74,35 +74,26 @@ describe MetaInspector::Document do
74
74
  })
75
75
  end
76
76
 
77
- describe 'exception handling' do
78
- it "should not parse images when parse_html_content_type_only is not specified" do
77
+ describe "allow_non_html_content option" do
78
+ it "should not allow non-html content type by default" do
79
79
  expect do
80
80
  image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png')
81
81
  image_url.title
82
82
  end.to raise_error(MetaInspector::ParserError)
83
83
  end
84
84
 
85
- it "should parse images when parse_html_content_type_only is false" do
85
+ it "should not allow non-html content type when explicitly disallowed" do
86
86
  expect do
87
- image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: false)
88
- image_url.title
89
- end.to_not raise_error
90
- end
91
-
92
- it "should handle errors when content is image/jpeg and html_content_type_only is true" do
93
- expect do
94
- image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', html_content_only: true)
95
-
87
+ image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', allow_non_html_content: false)
96
88
  image_url.title
97
89
  end.to raise_error(MetaInspector::ParserError)
98
90
  end
99
91
 
100
- it "should handle errors when content is not text/html and html_content_type_only is true" do
92
+ it "should allow non-html content type when explicitly allowed" do
101
93
  expect do
102
- tar_url = MetaInspector::Document.new('http://pagerankalert.com/file.tar.gz', html_content_only: true)
103
-
104
- tar_url.title
105
- end.to raise_error(MetaInspector::ParserError)
94
+ image_url = MetaInspector::Document.new('http://pagerankalert.com/image.png', allow_non_html_content: true)
95
+ image_url.title
96
+ end.to_not raise_error(MetaInspector::ParserError)
106
97
  end
107
98
  end
108
99
 
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.1.3
4
+ version: 5.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-04-12 00:00:00.000000000 Z
11
+ date: 2016-04-21 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: nokogiri
@@ -114,14 +114,14 @@ dependencies:
114
114
  requirements:
115
115
  - - "~>"
116
116
  - !ruby/object:Gem::Version
117
- version: 1.8.1
117
+ version: '2.0'
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
122
  - - "~>"
123
123
  - !ruby/object:Gem::Version
124
- version: 1.8.1
124
+ version: '2.0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: nesty
127
127
  requirement: !ruby/object:Gem::Requirement