metainspector 4.3.3 → 4.4.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +11 -2
- data/bin/console +7 -0
- data/lib/meta_inspector/document.rb +4 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ccdca184756e92a93aee21d1c2ac23bd0acfad7e
|
4
|
+
data.tar.gz: 0039759535f19c26f0e70271bbceb8f53a79ec2e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: f0ef85b48a07af588d53bb148b5e4badae0df4fd49ce46b890cec6a3471c817704ce127a0243c11f5e945cbd741f831a7964645af590ba5f8b33003e78cc43d5
|
7
|
+
data.tar.gz: 52bd1966da3e5fa8093d7b59d99880e26e8c40b03d390be0c276802edfdb0cf500892149f3fa7bccee631f086bf0db8a8701adc57b211dbd152754ee2cf14a66
|
data/README.md
CHANGED
@@ -8,6 +8,10 @@ You give it an URL, and it lets you easily get its title, links, images, charset
|
|
8
8
|
|
9
9
|
You can try MetaInspector live at this little demo: [https://metainspectordemo.herokuapp.com](https://metainspectordemo.herokuapp.com)
|
10
10
|
|
11
|
+
## Changes in 4.4
|
12
|
+
|
13
|
+
The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
14
|
+
|
11
15
|
## Changes in 4.3
|
12
16
|
|
13
17
|
* The Document API has been extended with one new method `page.best_title` that returns the longest text available from a selection of candidates.
|
@@ -310,10 +314,15 @@ page = MetaInspector.new('facebook.com', :allow_redirections => false)
|
|
310
314
|
By default, the following headers are set:
|
311
315
|
|
312
316
|
```ruby
|
313
|
-
{
|
317
|
+
{
|
318
|
+
'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)",
|
319
|
+
'Accept-Encoding' => 'identity'
|
320
|
+
}
|
314
321
|
```
|
315
322
|
|
316
|
-
|
323
|
+
The `Accept-Encoding` is set to `identity` to avoid exceptions being raised on servers that return malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
324
|
+
|
325
|
+
If you want to override the default headers then use the `headers` option:
|
317
326
|
|
318
327
|
```ruby
|
319
328
|
# Set the User-Agent header
|
data/bin/console
ADDED
@@ -87,7 +87,10 @@ module MetaInspector
|
|
87
87
|
:retries => 3,
|
88
88
|
:html_content_only => false,
|
89
89
|
:warn_level => :raise,
|
90
|
-
:headers => {
|
90
|
+
:headers => {
|
91
|
+
'User-Agent' => default_user_agent,
|
92
|
+
'Accept-Encoding' => 'identity'
|
93
|
+
},
|
91
94
|
:allow_redirections => true,
|
92
95
|
:normalize_url => true,
|
93
96
|
:download_images => true }
|
data/spec/document_spec.rb
CHANGED
@@ -158,7 +158,7 @@ describe MetaInspector::Document do
|
|
158
158
|
describe 'headers' do
|
159
159
|
it "should include default headers" do
|
160
160
|
url = "http://pagerankalert.com/"
|
161
|
-
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
161
|
+
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)", 'Accept-Encoding' => 'identity'}
|
162
162
|
|
163
163
|
headers = {}
|
164
164
|
expect(headers).to receive(:merge!).with(expected_headers)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.
|
4
|
+
version: 4.4.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Jaime Iniesta
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-
|
11
|
+
date: 2015-03-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -237,6 +237,7 @@ files:
|
|
237
237
|
- MIT-LICENSE
|
238
238
|
- README.md
|
239
239
|
- Rakefile
|
240
|
+
- bin/console
|
240
241
|
- examples/basic_scraping.rb
|
241
242
|
- examples/link_checker.rb
|
242
243
|
- examples/spider.rb
|
@@ -332,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
332
333
|
version: '0'
|
333
334
|
requirements: []
|
334
335
|
rubyforge_project:
|
335
|
-
rubygems_version: 2.
|
336
|
+
rubygems_version: 2.4.5
|
336
337
|
signing_key:
|
337
338
|
specification_version: 4
|
338
339
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|