metainspector 4.3.3 → 4.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +11 -2
- data/bin/console +7 -0
- data/lib/meta_inspector/document.rb +4 -1
- data/lib/meta_inspector/version.rb +1 -1
- data/spec/document_spec.rb +1 -1
- metadata +4 -3
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA1:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: ccdca184756e92a93aee21d1c2ac23bd0acfad7e
|
|
4
|
+
data.tar.gz: 0039759535f19c26f0e70271bbceb8f53a79ec2e
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f0ef85b48a07af588d53bb148b5e4badae0df4fd49ce46b890cec6a3471c817704ce127a0243c11f5e945cbd741f831a7964645af590ba5f8b33003e78cc43d5
|
|
7
|
+
data.tar.gz: 52bd1966da3e5fa8093d7b59d99880e26e8c40b03d390be0c276802edfdb0cf500892149f3fa7bccee631f086bf0db8a8701adc57b211dbd152754ee2cf14a66
|
data/README.md
CHANGED
|
@@ -8,6 +8,10 @@ You give it an URL, and it lets you easily get its title, links, images, charset
|
|
|
8
8
|
|
|
9
9
|
You can try MetaInspector live at this little demo: [https://metainspectordemo.herokuapp.com](https://metainspectordemo.herokuapp.com)
|
|
10
10
|
|
|
11
|
+
## Changes in 4.4
|
|
12
|
+
|
|
13
|
+
The default headers now include `'Accept-Encoding' => 'identity'` to minimize trouble with servers that respond with malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
|
14
|
+
|
|
11
15
|
## Changes in 4.3
|
|
12
16
|
|
|
13
17
|
* The Document API has been extended with one new method `page.best_title` that returns the longest text available from a selection of candidates.
|
|
@@ -310,10 +314,15 @@ page = MetaInspector.new('facebook.com', :allow_redirections => false)
|
|
|
310
314
|
By default, the following headers are set:
|
|
311
315
|
|
|
312
316
|
```ruby
|
|
313
|
-
{
|
|
317
|
+
{
|
|
318
|
+
'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)",
|
|
319
|
+
'Accept-Encoding' => 'identity'
|
|
320
|
+
}
|
|
314
321
|
```
|
|
315
322
|
|
|
316
|
-
|
|
323
|
+
The `Accept-Encoding` is set to `identity` to avoid exceptions being raised on servers that return malformed compressed responses, [as explained here](https://github.com/lostisland/faraday/issues/337).
|
|
324
|
+
|
|
325
|
+
If you want to override the default headers then use the `headers` option:
|
|
317
326
|
|
|
318
327
|
```ruby
|
|
319
328
|
# Set the User-Agent header
|
data/bin/console
ADDED
|
@@ -87,7 +87,10 @@ module MetaInspector
|
|
|
87
87
|
:retries => 3,
|
|
88
88
|
:html_content_only => false,
|
|
89
89
|
:warn_level => :raise,
|
|
90
|
-
:headers => {
|
|
90
|
+
:headers => {
|
|
91
|
+
'User-Agent' => default_user_agent,
|
|
92
|
+
'Accept-Encoding' => 'identity'
|
|
93
|
+
},
|
|
91
94
|
:allow_redirections => true,
|
|
92
95
|
:normalize_url => true,
|
|
93
96
|
:download_images => true }
|
data/spec/document_spec.rb
CHANGED
|
@@ -158,7 +158,7 @@ describe MetaInspector::Document do
|
|
|
158
158
|
describe 'headers' do
|
|
159
159
|
it "should include default headers" do
|
|
160
160
|
url = "http://pagerankalert.com/"
|
|
161
|
-
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)"}
|
|
161
|
+
expected_headers = {'User-Agent' => "MetaInspector/#{MetaInspector::VERSION} (+https://github.com/jaimeiniesta/metainspector)", 'Accept-Encoding' => 'identity'}
|
|
162
162
|
|
|
163
163
|
headers = {}
|
|
164
164
|
expect(headers).to receive(:merge!).with(expected_headers)
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: metainspector
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.
|
|
4
|
+
version: 4.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Jaime Iniesta
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2015-
|
|
11
|
+
date: 2015-03-12 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: nokogiri
|
|
@@ -237,6 +237,7 @@ files:
|
|
|
237
237
|
- MIT-LICENSE
|
|
238
238
|
- README.md
|
|
239
239
|
- Rakefile
|
|
240
|
+
- bin/console
|
|
240
241
|
- examples/basic_scraping.rb
|
|
241
242
|
- examples/link_checker.rb
|
|
242
243
|
- examples/spider.rb
|
|
@@ -332,7 +333,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
|
332
333
|
version: '0'
|
|
333
334
|
requirements: []
|
|
334
335
|
rubyforge_project:
|
|
335
|
-
rubygems_version: 2.
|
|
336
|
+
rubygems_version: 2.4.5
|
|
336
337
|
signing_key:
|
|
337
338
|
specification_version: 4
|
|
338
339
|
summary: MetaInspector is a ruby gem for web scraping purposes, that returns metadata
|