metainspector 1.15.3 → 1.15.4

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore CHANGED
@@ -1,6 +1,8 @@
1
1
  *.gem
2
2
  .bundle
3
3
  .rvmrc
4
+ .ruby-version
5
+ .ruby-gemset
4
6
  .rspec
5
7
  Gemfile.lock
6
8
  pkg/*
data/.travis.yml CHANGED
@@ -1,4 +1,3 @@
1
1
  rvm:
2
- - 1.8.7
3
2
  - 1.9.2
4
3
  - 1.9.3
data/README.md CHANGED
@@ -11,12 +11,12 @@ You can try MetaInspector live at this little demo: [https://metainspectordemo.h
11
11
  Install the gem from RubyGems:
12
12
 
13
13
  gem install metainspector
14
-
14
+
15
15
  If you're using it on a Rails application, just add it to your Gemfile and run `bundle install`
16
16
 
17
17
  gem 'metainspector'
18
18
 
19
- This gem is tested on Ruby versions 1.8.7, 1.9.2 and 1.9.3.
19
+ This gem is tested on Ruby versions 1.9.2 and 1.9.3.
20
20
 
21
21
  ## Usage
22
22
 
@@ -81,7 +81,7 @@ The original document is accessible from:
81
81
  And the full scraped document is accessible from:
82
82
 
83
83
  page.parsed_document # Nokogiri doc that you can use it to get any element from the page
84
-
84
+
85
85
  ## Options
86
86
 
87
87
  ### Timeout
@@ -99,7 +99,7 @@ However, you can tell MetaInspector to allow these redirections with the option
99
99
 
100
100
  # This will allow HTTP => HTTPS redirections
101
101
  page = MetaInspector.new('facebook.com', :allow_redirections => :safe)
102
-
102
+
103
103
  # And this will allow HTTP => HTTPS ("safe") and HTTPS => HTTP ("unsafe") redirections
104
104
  page = MetaInspector.new('facebook.com', :allow_redirections => :all)
105
105
 
@@ -119,7 +119,7 @@ This is useful when using MetaInspector on web spidering. Although on the initia
119
119
  page = MetaInspector.new('http://example.com/image.png', :html_content_only => true)
120
120
  page.title # returns nil
121
121
  page.content_type # "image/png"
122
- page.ok? # false
122
+ page.ok? # false
123
123
  page.errors.first # "Scraping exception: The url provided contains image/png content instead of text/html content"
124
124
 
125
125
  ## Error handling
@@ -143,28 +143,28 @@ You can find some sample scripts on the samples folder, including a basic scrapi
143
143
  $ irb
144
144
  >> require 'metainspector'
145
145
  => true
146
-
146
+
147
147
  >> page = MetaInspector.new('http://markupvalidator.com')
148
148
  => #<MetaInspector:0x11330c0 @url="http://markupvalidator.com">
149
-
149
+
150
150
  >> page.title
151
151
  => "MarkupValidator :: site-wide markup validation tool"
152
-
152
+
153
153
  >> page.meta_description
154
154
  => "Site-wide markup validation tool. Validate the markup of your whole site with just one click."
155
-
155
+
156
156
  >> page.meta_keywords
157
157
  => "html, markup, validation, validator, tool, w3c, development, standards, free"
158
-
158
+
159
159
  >> page.links.size
160
160
  => 15
161
-
161
+
162
162
  >> page.links[4]
163
163
  => "/plans-and-pricing"
164
-
164
+
165
165
  >> page.document.class
166
166
  => String
167
-
167
+
168
168
  >> page.parsed_document.class
169
169
  => Nokogiri::HTML::Document
170
170
 
@@ -234,7 +234,7 @@ module MetaInspector
234
234
  if uri =~ /^\w*\:/i
235
235
  normalize_url(uri)
236
236
  else
237
- URI.parse(@url).merge(normalize_url(uri)).to_s
237
+ Addressable::URI.join(@url, uri).normalize.to_s
238
238
  end
239
239
  rescue URI::InvalidURIError, Addressable::URI::InvalidURIError => e
240
240
  add_fatal_error "Link parsing exception: #{e.message}" and nil
@@ -266,4 +266,4 @@ module MetaInspector
266
266
  parsed_document.css("meta[http-equiv='Content-Type']")[0].attributes['content'].value.split(";")[1].split("=")[1] rescue nil
267
267
  end
268
268
  end
269
- end
269
+ end
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.15.3"
5
- end
4
+ VERSION = "1.15.4"
5
+ end
@@ -15,11 +15,11 @@ Gem::Specification.new do |gem|
15
15
  gem.version = MetaInspector::VERSION
16
16
 
17
17
  gem.add_dependency 'nokogiri', '~> 1.5'
18
- gem.add_dependency 'rash', '0.3.2'
18
+ gem.add_dependency 'rash', '~> 0.4.0'
19
19
  gem.add_dependency 'open_uri_redirections', '~> 0.1.0'
20
- gem.add_dependency 'addressable', '~> 2.3.2'
20
+ gem.add_dependency 'addressable', '~> 2.3.4'
21
21
 
22
- gem.add_development_dependency 'rspec', '2.12.0'
22
+ gem.add_development_dependency 'rspec', '2.13.0'
23
23
  gem.add_development_dependency 'fakeweb', '1.3.0'
24
24
  gem.add_development_dependency 'awesome_print', '1.1.0'
25
25
  gem.add_development_dependency 'rake', '~> 10.0.3'
@@ -16,5 +16,6 @@ Accept-Ranges: bytes
16
16
  <body>
17
17
  <p>Relative links</p>
18
18
  <a href="about">About</a>
19
+ <a href="../sitemap">Sitemap</a>
19
20
  </body>
20
- </html>
21
+ </html>
@@ -234,20 +234,9 @@ describe MetaInspector do
234
234
  end
235
235
  end
236
236
 
237
- it "should avoid links that contain invalid links as href value" do
237
+ it "should not crash with links that have weird href values" do
238
238
  m = MetaInspector.new('http://example.com/invalid_href')
239
- m.links.should == [ "skype:joeuser?call",
240
- "telnet://telnet.cdrom.com"]
241
- end
242
-
243
- it "should store errors when links contain invalid href values" do
244
- m = MetaInspector.new('http://example.com/invalid_href')
245
-
246
- expect {
247
- links = m.links
248
- }.to change { m.errors.size }.from(0).to(1)
249
-
250
- m.errors.first.should == "Link parsing exception: bad URI(is not URI?): %3Cp%3Eftp://ftp.cdrom.com"
239
+ m.links.should == ["%3Cp%3Eftp://ftp.cdrom.com", "skype:joeuser?call", "telnet://telnet.cdrom.com"]
251
240
  end
252
241
  end
253
242
 
@@ -258,7 +247,7 @@ describe MetaInspector do
258
247
  end
259
248
 
260
249
  it 'should get the relative links' do
261
- @m.internal_links.should == ['http://relative.com/about']
250
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
262
251
  end
263
252
  end
264
253
 
@@ -268,7 +257,7 @@ describe MetaInspector do
268
257
  end
269
258
 
270
259
  it 'should get the relative links' do
271
- @m.internal_links.should == ['http://relative.com/about']
260
+ @m.internal_links.should == ['http://relative.com/about', 'http://relative.com/sitemap']
272
261
  end
273
262
  end
274
263
 
@@ -278,7 +267,7 @@ describe MetaInspector do
278
267
  end
279
268
 
280
269
  it 'should get the relative links' do
281
- @m.internal_links.should == ['http://relative.com/company/about']
270
+ @m.internal_links.should == ['http://relative.com/company/about', 'http://relative.com/sitemap']
282
271
  end
283
272
  end
284
273
  end
@@ -524,4 +513,4 @@ describe MetaInspector do
524
513
  good.content_type.should == "text/html"
525
514
  end
526
515
  end
527
- end
516
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.15.3
4
+ version: 1.15.4
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -9,7 +9,7 @@ authors:
9
9
  autorequire:
10
10
  bindir: bin
11
11
  cert_chain: []
12
- date: 2013-04-16 00:00:00.000000000 Z
12
+ date: 2013-04-19 00:00:00.000000000 Z
13
13
  dependencies:
14
14
  - !ruby/object:Gem::Dependency
15
15
  name: nokogiri
@@ -32,17 +32,17 @@ dependencies:
32
32
  requirement: !ruby/object:Gem::Requirement
33
33
  none: false
34
34
  requirements:
35
- - - '='
35
+ - - ~>
36
36
  - !ruby/object:Gem::Version
37
- version: 0.3.2
37
+ version: 0.4.0
38
38
  type: :runtime
39
39
  prerelease: false
40
40
  version_requirements: !ruby/object:Gem::Requirement
41
41
  none: false
42
42
  requirements:
43
- - - '='
43
+ - - ~>
44
44
  - !ruby/object:Gem::Version
45
- version: 0.3.2
45
+ version: 0.4.0
46
46
  - !ruby/object:Gem::Dependency
47
47
  name: open_uri_redirections
48
48
  requirement: !ruby/object:Gem::Requirement
@@ -66,7 +66,7 @@ dependencies:
66
66
  requirements:
67
67
  - - ~>
68
68
  - !ruby/object:Gem::Version
69
- version: 2.3.2
69
+ version: 2.3.4
70
70
  type: :runtime
71
71
  prerelease: false
72
72
  version_requirements: !ruby/object:Gem::Requirement
@@ -74,7 +74,7 @@ dependencies:
74
74
  requirements:
75
75
  - - ~>
76
76
  - !ruby/object:Gem::Version
77
- version: 2.3.2
77
+ version: 2.3.4
78
78
  - !ruby/object:Gem::Dependency
79
79
  name: rspec
80
80
  requirement: !ruby/object:Gem::Requirement
@@ -82,7 +82,7 @@ dependencies:
82
82
  requirements:
83
83
  - - '='
84
84
  - !ruby/object:Gem::Version
85
- version: 2.12.0
85
+ version: 2.13.0
86
86
  type: :development
87
87
  prerelease: false
88
88
  version_requirements: !ruby/object:Gem::Requirement
@@ -90,7 +90,7 @@ dependencies:
90
90
  requirements:
91
91
  - - '='
92
92
  - !ruby/object:Gem::Version
93
- version: 2.12.0
93
+ version: 2.13.0
94
94
  - !ruby/object:Gem::Dependency
95
95
  name: fakeweb
96
96
  requirement: !ruby/object:Gem::Requirement
@@ -204,7 +204,7 @@ required_ruby_version: !ruby/object:Gem::Requirement
204
204
  version: '0'
205
205
  segments:
206
206
  - 0
207
- hash: 2034097177309150262
207
+ hash: -4602043206768445405
208
208
  required_rubygems_version: !ruby/object:Gem::Requirement
209
209
  none: false
210
210
  requirements:
@@ -213,7 +213,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
213
213
  version: '0'
214
214
  segments:
215
215
  - 0
216
- hash: 2034097177309150262
216
+ hash: -4602043206768445405
217
217
  requirements: []
218
218
  rubyforge_project:
219
219
  rubygems_version: 1.8.25