metainspector 1.15.3 → 1.15.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/.travis.yml +0 -1
- data/README.md +13 -13
- data/lib/meta_inspector/scraper.rb +2 -2
- data/lib/meta_inspector/version.rb +2 -2
- data/meta_inspector.gemspec +3 -3
- data/spec/fixtures/relative_links.response +2 -1
- data/spec/metainspector_spec.rb +6 -17
- metadata +12 -12
data/.gitignore
CHANGED
data/.travis.yml
CHANGED
data/README.md
CHANGED
@@ -11,12 +11,12 @@ You can try MetaInspector live at this little demo: [https://metainspectordemo.h
|
|
11
11
|
Install the gem from RubyGems:
|
12
12
|
|
13
13
|
gem install metainspector
|
14
|
-
|
14
|
+
|
15
15
|
If you're using it on a Rails application, just add it to your Gemfile and run `bundle install`
|
16
16
|
|
17
17
|
gem 'metainspector'
|
18
18
|
|
19
|
-
This gem is tested on Ruby versions 1.
|
19
|
+
This gem is tested on Ruby versions 1.9.2 and 1.9.3.
|
20
20
|
|
21
21
|
## Usage
|
22
22
|
|
@@ -81,7 +81,7 @@ The original document is accessible from:
|
|
81
81
|
And the full scraped document is accessible from:
|
82
82
|
|
83
83
|
page.parsed_document # Nokogiri doc that you can use it to get any element from the page
|
84
|
-
|
84
|
+
|
85
85
|
## Options
|
86
86
|
|
87
87
|
### Timeout
|
@@ -99,7 +99,7 @@ However, you can tell MetaInspector to allow these redirections with the option
|
|
99
99
|
|
100
100
|
# This will allow HTTP => HTTPS redirections
|
101
101
|
page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
|
102
|
-
|
102
|
+
|
103
103
|
# And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
|
104
104
|
page = MetaInspector.new('facebook.com', :allow_redirections => :all)
|
105
105
|
|
@@ -119,7 +119,7 @@ This is useful when using MetaInspector on web spidering. Although on the initia
|
|
119
119
|
page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
|
120
120
|
page.title # returns nil
|
121
121
|
page.content_type # "image/png"
|
122
|
-
page.ok? # false
|
122
|
+
page.ok? # false
|
123
123
|
page.errors.first # "Scraping exception: The url provided contains image/png content instead of text/html content"
|
124
124
|
|
125
125
|
## Error handling
|
@@ -143,28 +143,28 @@ You can find some sample scripts on the samples folder, including a basic scrapi
|
|
143
143
|
$ irb
|
144
144
|
>> require 'metainspector'
|
145
145
|
=> true
|
146
|
-
|
146
|
+
|
147
147
|
>> page = MetaInspector.new('http://markupvalidator.com')
|
148
148
|
=> #<MetaInspector:0x11330c0 @url="http://markupvalidator.com">
|
149
|
-
|
149
|
+
|
150
150
|
>> page.title
|
151
151
|
=> "MarkupValidator :: site-wide markup validation tool"
|
152
|
-
|
152
|
+
|
153
153
|
>> page.meta_description
|
154
154
|
=> "Site-wide markup validation tool. Validate the markup of your whole site with just one click."
|
155
|
-
|
155
|
+
|
156
156
|
>> page.meta_keywords
|
157
157
|
=> "html, markup, validation, validator, tool, w3c, development, standards, free"
|
158
|
-
|
158
|
+
|
159
159
|
>> page.links.size
|
160
160
|
=> 15
|
161
|
-
|
161
|
+
|
162
162
|
>> page.links[4]
|
163
163
|
=> "/plans-and-pricing"
|
164
|
-
|
164
|
+
|
165
165
|
>> page.document.class
|
166
166
|
=> String
|
167
|
-
|
167
|
+
|
168
168
|
>> page.parsed_document.class
|
169
169
|
=> Nokogiri::HTML::Document
|
170
170
|
|
@@ -234,7 +234,7 @@ module MetaInspector
|
|
234
234
|
if uri =~ /^\w*\:/i
|
235
235
|
normalize_url(uri)
|
236
236
|
else
|
237
|
-
URI.
|
237
|
+
Addressable::URI.join(@url, uri).normalize.to_s
|
238
238
|
end
|
239
239
|
rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
|
240
240
|
add_fatal_error "Link parsing exception: #{e.message}" and nil
|
@@ -266,4 +266,4 @@ module MetaInspector
|
|
266
266
|
parsed_document.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(";")[1].split("=")[1] rescue nil
|
267
267
|
end
|
268
268
|
end
|
269
|
-
end
|
269
|
+
end
|
data/meta_inspector.gemspec
CHANGED
@@ -15,11 +15,11 @@ Gem::Specification.new do |gem|
|
|
15
15
|
gem.version = MetaInspector::VERSION
|
16
16
|
|
17
17
|
gem.add_dependency 'nokogiri', '~> 1.5'
|
18
|
-
gem.add_dependency 'rash', '0.
|
18
|
+
gem.add_dependency 'rash', '~> 0.4.0'
|
19
19
|
gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
|
20
|
-
gem.add_dependency 'addressable', '~> 2.3.
|
20
|
+
gem.add_dependency 'addressable', '~> 2.3.4'
|
21
21
|
|
22
|
-
gem.add_development_dependency 'rspec', '2.
|
22
|
+
gem.add_development_dependency 'rspec', '2.13.0'
|
23
23
|
gem.add_development_dependency 'fakeweb', '1.3.0'
|
24
24
|
gem.add_development_dependency 'awesome_print', '1.1.0'
|
25
25
|
gem.add_development_dependency 'rake', '~> 10.0.3'
|
data/spec/metainspector_spec.rb
CHANGED
@@ -234,20 +234,9 @@ describe MetaInspector do
|
|
234
234
|
end
|
235
235
|
end
|
236
236
|
|
237
|
-
it "should
|
237
|
+
it "should not crash with links that have weird href values" do
|
238
238
|
m = MetaInspector.new('http://example.com/invalid_href')
|
239
|
-
m.links.should == [ "skype:joeuser?call",
|
240
|
-
"telnet://telnet.cdrom.com"]
|
241
|
-
end
|
242
|
-
|
243
|
-
it "should store errors when links contain invalid href values" do
|
244
|
-
m = MetaInspector.new('http://example.com/invalid_href')
|
245
|
-
|
246
|
-
expect {
|
247
|
-
links = m.links
|
248
|
-
}.to change { m.errors.size }.from(0).to(1)
|
249
|
-
|
250
|
-
m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
|
239
|
+
m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
|
251
240
|
end
|
252
241
|
end
|
253
242
|
|
@@ -258,7 +247,7 @@ describe MetaInspector do
|
|
258
247
|
end
|
259
248
|
|
260
249
|
it 'should get the relative links' do
|
261
|
-
@m.internal_links.should == ['http://relative.com/about']
|
250
|
+
@m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
|
262
251
|
end
|
263
252
|
end
|
264
253
|
|
@@ -268,7 +257,7 @@ describe MetaInspector do
|
|
268
257
|
end
|
269
258
|
|
270
259
|
it 'should get the relative links' do
|
271
|
-
@m.internal_links.should == ['http://relative.com/about']
|
260
|
+
@m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
|
272
261
|
end
|
273
262
|
end
|
274
263
|
|
@@ -278,7 +267,7 @@ describe MetaInspector do
|
|
278
267
|
end
|
279
268
|
|
280
269
|
it 'should get the relative links' do
|
281
|
-
@m.internal_links.should == ['http://relative.com/company/about']
|
270
|
+
@m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
|
282
271
|
end
|
283
272
|
end
|
284
273
|
end
|
@@ -524,4 +513,4 @@ describe MetaInspector do
|
|
524
513
|
good.content_type.should == "text/html"
|
525
514
|
end
|
526
515
|
end
|
527
|
-
end
|
516
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: metainspector
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.15.
|
4
|
+
version: 1.15.4
|
5
5
|
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2013-04-
|
12
|
+
date: 2013-04-19 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -32,17 +32,17 @@ dependencies:
|
|
32
32
|
requirement: !ruby/object:Gem::Requirement
|
33
33
|
none: false
|
34
34
|
requirements:
|
35
|
-
- -
|
35
|
+
- - ~>
|
36
36
|
- !ruby/object:Gem::Version
|
37
|
-
version: 0.
|
37
|
+
version: 0.4.0
|
38
38
|
type: :runtime
|
39
39
|
prerelease: false
|
40
40
|
version_requirements: !ruby/object:Gem::Requirement
|
41
41
|
none: false
|
42
42
|
requirements:
|
43
|
-
- -
|
43
|
+
- - ~>
|
44
44
|
- !ruby/object:Gem::Version
|
45
|
-
version: 0.
|
45
|
+
version: 0.4.0
|
46
46
|
- !ruby/object:Gem::Dependency
|
47
47
|
name: open_uri_redirections
|
48
48
|
requirement: !ruby/object:Gem::Requirement
|
@@ -66,7 +66,7 @@ dependencies:
|
|
66
66
|
requirements:
|
67
67
|
- - ~>
|
68
68
|
- !ruby/object:Gem::Version
|
69
|
-
version: 2.3.
|
69
|
+
version: 2.3.4
|
70
70
|
type: :runtime
|
71
71
|
prerelease: false
|
72
72
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -74,7 +74,7 @@ dependencies:
|
|
74
74
|
requirements:
|
75
75
|
- - ~>
|
76
76
|
- !ruby/object:Gem::Version
|
77
|
-
version: 2.3.
|
77
|
+
version: 2.3.4
|
78
78
|
- !ruby/object:Gem::Dependency
|
79
79
|
name: rspec
|
80
80
|
requirement: !ruby/object:Gem::Requirement
|
@@ -82,7 +82,7 @@ dependencies:
|
|
82
82
|
requirements:
|
83
83
|
- - '='
|
84
84
|
- !ruby/object:Gem::Version
|
85
|
-
version: 2.
|
85
|
+
version: 2.13.0
|
86
86
|
type: :development
|
87
87
|
prerelease: false
|
88
88
|
version_requirements: !ruby/object:Gem::Requirement
|
@@ -90,7 +90,7 @@ dependencies:
|
|
90
90
|
requirements:
|
91
91
|
- - '='
|
92
92
|
- !ruby/object:Gem::Version
|
93
|
-
version: 2.
|
93
|
+
version: 2.13.0
|
94
94
|
- !ruby/object:Gem::Dependency
|
95
95
|
name: fakeweb
|
96
96
|
requirement: !ruby/object:Gem::Requirement
|
@@ -204,7 +204,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
204
204
|
version: '0'
|
205
205
|
segments:
|
206
206
|
- 0
|
207
|
-
hash:
|
207
|
+
hash: -4602043206768445405
|
208
208
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
209
209
|
none: false
|
210
210
|
requirements:
|
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
213
213
|
version: '0'
|
214
214
|
segments:
|
215
215
|
- 0
|
216
|
-
hash:
|
216
|
+
hash: -4602043206768445405
|
217
217
|
requirements: []
|
218
218
|
rubyforge_project:
|
219
219
|
rubygems_version: 1.8.25
|