metainspector 1.15.3 → 1.15.4
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/.travis.yml +0 -1
- data/README.md +13 -13
- data/lib/meta_inspector/scraper.rb +2 -2
- data/lib/meta_inspector/version.rb +2 -2
- data/meta_inspector.gemspec +3 -3
- data/spec/fixtures/relative_links.response +2 -1
- data/spec/metainspector_spec.rb +6 -17
- metadata +12 -12
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -11,12 +11,12 @@ You can try MetaInspector live at this little demo: [https://metainspectordemo.h
|
|
11
11
|
Install the gem from RubyGems:
|
12
12
|
|
13
13
|
gem install metainspector
|
14
|
-
|
14
|
+
|
15
15
|
If you're using it on a Rails application, just add it to your Gemfile and run `bundle install`
|
16
16
|
|
17
17
|
gem 'metainspector'
|
18
18
|
|
19
|
-
This gem is tested on Ruby versions 1.
|
19
|
+
This gem is tested on Ruby versions 1.9.2 and 1.9.3.
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
@@ -81,7 +81,7 @@ The original document is accessible from:
|
|
81
81
|
And the full scraped document is accessible from:
|
82
82
|
|
83
83
|
page.parsed_document # Nokogiri doc that you can use it to get any element from the page
|
84
|
-
|
84
|
+
|
85
85
|
## Options
|
86
86
|
|
87
87
|
### Timeout
|
@@ -99,7 +99,7 @@ However, you can tell MetaInspector to allow these redirections with the option
|
|
99
99
|
|
100
100
|
# This will allow HTTP => HTTPS redirections
|
101
101
|
page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
|
102
|
-
|
102
|
+
|
103
103
|
# And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
|
104
104
|
page = MetaInspector.new('facebook.com', :allow_redirections => :all)
|
105
105
|
|
@@ -119,7 +119,7 @@ This is useful when using MetaInspector on web spidering. Although on the initia
|
|
119
119
|
page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
|
120
120
|
page.title # returns nil
|
121
121
|
page.content_type # "image/png"
|
122
|
-
page.ok? # false
|
122
|
+
page.ok? # false
|
123
123
|
page.errors.first # "Scraping exception: The url provided contains image/png content instead of text/html content"
|
124
124
|
|
125
125
|
## Error handling
|
@@ -143,28 +143,28 @@ You can find some sample scripts on the samples folder, including a basic scrapi
|
|
143
143
|
$ irb
|
144
144
|
>> require 'metainspector'
|
145
145
|
=> true
|
146
|
-
|
146
|
+
|
147
147
|
>> page = MetaInspector.new('http://markupvalidator.com')
|
148
148
|
=> #<MetaInspector:0x11330c0 @url="http://markupvalidator.com">
|
149
|
-
|
149
|
+
|
150
150
|
>> page.title
|
151
151
|
=> "MarkupValidator :: site-wide markup validation tool"
|
152
|
-
|
152
|
+
|
153
153
|
>> page.meta_description
|
154
154
|
=> "Site-wide markup validation tool. Validate the markup of your whole site with just one click."
|
155
|
-
|
155
|
+
|
156
156
|
>> page.meta_keywords
|
157
157
|
=> "html, markup, validation, validator, tool, w3c, development, standards, free"
|
158
|
-
|
158
|
+
|
159
159
|
>> page.links.size
|
160
160
|
=> 15
|
161
|
-
|
161
|
+
|
162
162
|
>> page.links[4]
|
163
163
|
=> "/plans-and-pricing"
|
164
|
-
|
164
|
+
|
165
165
|
>> page.document.class
|
166
166
|
=> String
|
167
|
-
|
167
|
+
|
168
168
|
>> page.parsed_document.class
|
169
169
|
=> Nokogiri::HTML::Document
|
170
170
|
|
@@ -234,7 +234,7 @@ module MetaInspector
|
|
234
234
|
if uri =~ /^\w*\:/i
|
235
235
|
normalize_url(uri)
|
236
236
|
else
|
237
|
-
URI.
|
237
|
+
Addressable::URI.join(@url, uri).normalize.to_s
|
238
238
|
end
|
239
239
|
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
|
240
240
|
add_fatal_error "Link parsing exception: #{e.message}" and nil
|
@@ -266,4 +266,4 @@ module MetaInspector
|
|
266
266
|
parsed_document.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(";")[1].split("=")[1] rescue nil
|
267
267
|
end
|
268
268
|
end
|
269
|
-
end
|
269
|
+
end
|
data/meta_inspector.gemspec
CHANGED
@@ -15,11 +15,11 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
17
|
gem.add_dependency 'nokogiri', '~> 1.5'
|
18
|
-
gem.add_dependency 'rash', '0.
|
18
|
+
gem.add_dependency 'rash', '~> 0.4.0'
|
19
19
|
gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
|
20
|
-
gem.add_dependency 'addressable', '~> 2.3.
|
20
|
+
gem.add_dependency 'addressable', '~> 2.3.4'
|
21
21
|
|
22
|
-
gem.add_development_dependency 'rspec', '2.
|
22
|
+
gem.add_development_dependency 'rspec', '2.13.0'
|
23
23
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
24
24
|
gem.add_development_dependency 'awesome_print', '1.1.0'
|
25
25
|
gem.add_development_dependency 'rake', '~> 10.0.3'
|
data/spec/metainspector_spec.rb
CHANGED
@@ -234,20 +234,9 @@ describe MetaInspector do
|
|
234
234
|
end
|
235
235
|
end
|
236
236
|
|
237
|
-
it "should
|
237
|
+
it "should not crash with links that have weird href values" do
|
238
238
|
m = MetaInspector.new('http://example.com/invalid_href')
|
239
|
-
m.links.should == [ "skype:joeuser?call",
|
240
|
-
"telnet://telnet.cdrom.com"]
|
241
|
-
end
|
242
|
-
|
243
|
-
it "should store errors when links contain invalid href values" do
|
244
|
-
m = MetaInspector.new('http://example.com/invalid_href')
|
245
|
-
|
246
|
-
expect {
|
247
|
-
links = m.links
|
248
|
-
}.to change { m.errors.size }.from(0).to(1)
|
249
|
-
|
250
|
-
m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
|
239
|
+
m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
|
251
240
|
end
|
252
241
|
end
|
253
242
|
|
@@ -258,7 +247,7 @@ describe MetaInspector do
|
|
258
247
|
end
|
259
248
|
|
260
249
|
it 'should get the relative links' do
|
261
|
-
@m.internal_links.should == ['http://relative.com/about']
|
250
|
+
@m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
|
262
251
|
end
|
263
252
|
end
|
264
253
|
|
@@ -268,7 +257,7 @@ describe MetaInspector do
|
|
268
257
|
end
|
269
258
|
|
270
259
|
it 'should get the relative links' do
|
271
|
-
@m.internal_links.should == ['http://relative.com/about']
|
260
|
+
@m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
|
272
261
|
end
|
273
262
|
end
|
274
263
|
|
@@ -278,7 +267,7 @@ describe MetaInspector do
|
|
278
267
|
end
|
279
268
|
|
280
269
|
it 'should get the relative links' do
|
281
|
-
@m.internal_links.should == ['http://relative.com/company/about']
|
270
|
+
@m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
|
282
271
|
end
|
283
272
|
end
|
284
273
|
end
|
@@ -524,4 +513,4 @@ describe MetaInspector do
|
|
524
513
|
good.content_type.should == "text/html"
|
525
514
|
end
|
526
515
|
end
|
527
|
-
end
|
516
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.15.
|
4
|
+
version: 1.15.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -32,17 +32,17 @@ dependencies:
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 0.
|
37
|
+
version: 0.4.0
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0.
|
45
|
+
version: 0.4.0
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: open_uri_redirections
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
requirements:
|
67
67
|
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: 2.3.
|
69
|
+
version: 2.3.4
|
70
70
|
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -74,7 +74,7 @@ dependencies:
|
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 2.3.
|
77
|
+
version: 2.3.4
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: rspec
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -82,7 +82,7 @@ dependencies:
|
|
82
82
|
requirements:
|
83
83
|
- - '='
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version: 2.
|
85
|
+
version: 2.13.0
|
86
86
|
type: :development
|
87
87
|
prerelease: false
|
88
88
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -90,7 +90,7 @@ dependencies:
|
|
90
90
|
requirements:
|
91
91
|
- - '='
|
92
92
|
- !ruby/object:Gem::Version
|
93
|
-
version: 2.
|
93
|
+
version: 2.13.0
|
94
94
|
- !ruby/object:Gem::Dependency
|
95
95
|
name: fakeweb
|
96
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,7 +204,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
204
204
|
version: '0'
|
205
205
|
segments:
|
206
206
|
- 0
|
207
|
-
hash:
|
207
|
+
hash: -4602043206768445405
|
208
208
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
209
209
|
none: false
|
210
210
|
requirements:
|
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
213
|
version: '0'
|
214
214
|
segments:
|
215
215
|
- 0
|
216
|
-
hash:
|
216
|
+
hash: -4602043206768445405
|
217
217
|
requirements: []
|
218
218
|
rubyforge_project:
|
219
219
|
rubygems_version: 1.8.25
|