metainspector 1.15.3 → 1.15.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore CHANGED
@@ -1,6 +1,8 @@
1
1
  *.gem
2
2
  .bundle
3
3
  .rvmrc
4
+ .ruby-version
5
+ .ruby-gemset
4
6
  .rspec
5
7
  Gemfile.lock
6
8
  pkg/*
data/.travis.yml CHANGED
@@ -1,4 +1,3 @@
1
1
  rvm:
2
- - 1.8.7
3
2
  - 1.9.2
4
3
  - 1.9.3
data/README.md CHANGED
@@ -11,12 +11,12 @@ You can try MetaInspector live at this little demo: [https://metainspectordemo.h
11
11
  Install the gem from RubyGems:
12
12
 
13
13
  gem install metainspector
14
-
14
+
15
15
  If you're using it on a Rails application, just add it to your Gemfile and run `bundle install`
16
16
 
17
17
  gem 'metainspector'
18
18
 
19
- This gem is tested on Ruby versions 1.8.7, 1.9.2 and 1.9.3.
19
+ This gem is tested on Ruby versions 1.9.2 and 1.9.3.
20
20
 
21
21
  ## Usage
22
22
 
@@ -81,7 +81,7 @@ The original document is accessible from:
81
81
  And the full scraped document is accessible from:
82
82
 
83
83
  page.parsed_document # Nokogiri doc that you can use it to get any element from the page
84
-
84
+
85
85
  ## Options
86
86
 
87
87
  ### Timeout
@@ -99,7 +99,7 @@ However, you can tell MetaInspector to allow these redirections with the option
99
99
 
100
100
  # This will allow HTTP => HTTPS redirections
101
101
  page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
102
-
102
+
103
103
  # And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
104
104
  page = MetaInspector.new('facebook.com', :allow_redirections => :all)
105
105
 
@@ -119,7 +119,7 @@ This is useful when using MetaInspector on web spidering. Although on the initia
119
119
  page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
120
120
  page.title # returns nil
121
121
  page.content_type # "image/png"
122
- page.ok? # false
122
+ page.ok? # false
123
123
  page.errors.first # "Scraping exception: The url provided contains image/png content instead of text/html content"
124
124
 
125
125
  ## Error handling
@@ -143,28 +143,28 @@ You can find some sample scripts on the samples folder, including a basic scrapi
143
143
  $ irb
144
144
  >> require 'metainspector'
145
145
  => true
146
-
146
+
147
147
  >> page = MetaInspector.new('http://markupvalidator.com')
148
148
  => #<MetaInspector:0x11330c0 @url="http://markupvalidator.com">
149
-
149
+
150
150
  >> page.title
151
151
  => "MarkupValidator :: site-wide markup validation tool"
152
-
152
+
153
153
  >> page.meta_description
154
154
  => "Site-wide markup validation tool. Validate the markup of your whole site with just one click."
155
-
155
+
156
156
  >> page.meta_keywords
157
157
  => "html, markup, validation, validator, tool, w3c, development, standards, free"
158
-
158
+
159
159
  >> page.links.size
160
160
  => 15
161
-
161
+
162
162
  >> page.links[4]
163
163
  => "/plans-and-pricing"
164
-
164
+
165
165
  >> page.document.class
166
166
  => String
167
-
167
+
168
168
  >> page.parsed_document.class
169
169
  => Nokogiri::HTML::Document
170
170
 
@@ -234,7 +234,7 @@ module MetaInspector
234
234
  if uri =~ /^\w*\:/i
235
235
  normalize_url(uri)
236
236
  else
237
- URI.parse(@url).merge(normalize_url(uri)).to_s
237
+ Addressable::URI.join(@url, uri).normalize.to_s
238
238
  end
239
239
  rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
240
240
  add_fatal_error "Link parsing exception: #{e.message}" and nil
@@ -266,4 +266,4 @@ module MetaInspector
266
266
  parsed_document.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(";")[1].split("=")[1] rescue nil
267
267
  end
268
268
  end
269
- end
269
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.15.3"
5
- end
4
+ VERSION = "1.15.4"
5
+ end
@@ -15,11 +15,11 @@ Gem::Specification.new do |gem|
15
15
  gem.version = MetaInspector::VERSION
16
16
 
17
17
  gem.add_dependency 'nokogiri', '~> 1.5'
18
- gem.add_dependency 'rash', '0.3.2'
18
+ gem.add_dependency 'rash', '~> 0.4.0'
19
19
  gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
20
- gem.add_dependency 'addressable', '~> 2.3.2'
20
+ gem.add_dependency 'addressable', '~> 2.3.4'
21
21
 
22
- gem.add_development_dependency 'rspec', '2.12.0'
22
+ gem.add_development_dependency 'rspec', '2.13.0'
23
23
  gem.add_development_dependency 'fakeweb', '1.3.0'
24
24
  gem.add_development_dependency 'awesome_print', '1.1.0'
25
25
  gem.add_development_dependency 'rake', '~> 10.0.3'
@@ -16,5 +16,6 @@ Accept-Ranges: bytes
16
16
  <body>
17
17
  <p>Relative links</p>
18
18
  <a href="about">About</a>
19
+ <a href="../sitemap">Sitemap</a>
19
20
  </body>
20
- </html>
21
+ </html>
@@ -234,20 +234,9 @@ describe MetaInspector do
234
234
  end
235
235
  end
236
236
 
237
- it "should avoid links that contain invalid links as href value" do
237
+ it "should not crash with links that have weird href values" do
238
238
  m = MetaInspector.new('http://example.com/invalid_href')
239
- m.links.should == [ "skype:joeuser?call",
240
- "telnet://telnet.cdrom.com"]
241
- end
242
-
243
- it "should store errors when links contain invalid href values" do
244
- m = MetaInspector.new('http://example.com/invalid_href')
245
-
246
- expect {
247
- links = m.links
248
- }.to change { m.errors.size }.from(0).to(1)
249
-
250
- m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
239
+ m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
251
240
  end
252
241
  end
253
242
 
@@ -258,7 +247,7 @@ describe MetaInspector do
258
247
  end
259
248
 
260
249
  it 'should get the relative links' do
261
- @m.internal_links.should == ['http://relative.com/about']
250
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
262
251
  end
263
252
  end
264
253
 
@@ -268,7 +257,7 @@ describe MetaInspector do
268
257
  end
269
258
 
270
259
  it 'should get the relative links' do
271
- @m.internal_links.should == ['http://relative.com/about']
260
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
272
261
  end
273
262
  end
274
263
 
@@ -278,7 +267,7 @@ describe MetaInspector do
278
267
  end
279
268
 
280
269
  it 'should get the relative links' do
281
- @m.internal_links.should == ['http://relative.com/company/about']
270
+ @m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
282
271
  end
283
272
  end
284
273
  end
@@ -524,4 +513,4 @@ describe MetaInspector do
524
513
  good.content_type.should == "text/html"
525
514
  end
526
515
  end
527
- end
516
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.15.3
4
+ version: 1.15.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-16 00:00:00.000000000 Z
12
+ date: 2013-04-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -32,17 +32,17 @@ dependencies:
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - '='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 0.3.2
37
+ version: 0.4.0
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - '='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 0.3.2
45
+ version: 0.4.0
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: open_uri_redirections
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +66,7 @@ dependencies:
66
66
  requirements:
67
67
  - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: 2.3.2
69
+ version: 2.3.4
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -74,7 +74,7 @@ dependencies:
74
74
  requirements:
75
75
  - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: 2.3.2
77
+ version: 2.3.4
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: rspec
80
80
  requirement: !ruby/object:Gem::Requirement
@@ -82,7 +82,7 @@ dependencies:
82
82
  requirements:
83
83
  - - '='
84
84
  - !ruby/object:Gem::Version
85
- version: 2.12.0
85
+ version: 2.13.0
86
86
  type: :development
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
@@ -90,7 +90,7 @@ dependencies:
90
90
  requirements:
91
91
  - - '='
92
92
  - !ruby/object:Gem::Version
93
- version: 2.12.0
93
+ version: 2.13.0
94
94
  - !ruby/object:Gem::Dependency
95
95
  name: fakeweb
96
96
  requirement: !ruby/object:Gem::Requirement
@@ -204,7 +204,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
204
204
  version: '0'
205
205
  segments:
206
206
  - 0
207
- hash: 2034097177309150262
207
+ hash: -4602043206768445405
208
208
  required_rubygems_version: !ruby/object:Gem::Requirement
209
209
  none: false
210
210
  requirements:
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
213
  version: '0'
214
214
  segments:
215
215
  - 0
216
- hash: 2034097177309150262
216
+ hash: -4602043206768445405
217
217
  requirements: []
218
218
  rubyforge_project:
219
219
  rubygems_version: 1.8.25