metainspector 1.13.1 → 1.14.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -28,6 +28,10 @@ If you don't include the scheme on the URL, http:// will be used by default:
28
28
 
29
29
  page = MetaInspector.new('markupvalidator.com')
30
30
 
31
+ You can also include the html which will be used as the document to scrape:
32
+
33
+ page = MetaInspector.new("http://markupvalidator.com", :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
34
+
31
35
  ## Accessing scraped data
32
36
 
33
37
  Then you can see the scraped data like this:
@@ -70,9 +74,13 @@ You can also access most of the scraped data as a hash:
70
74
  page.to_hash # { "url" => "http://markupvalidator.com",
71
75
  "title" => "MarkupValidator :: site-wide markup validation tool", ... }
72
76
 
73
- The full scraped document if accessible from:
77
+ The original document is accessible from:
78
+
79
+ page.document # A String with the contents of the HTML document
80
+
81
+ And the full scraped document is accessible from:
74
82
 
75
- page.document # Nokogiri doc that you can use it to get any element from the page
83
+ page.parsed_document # Nokogiri doc that you can use it to get any element from the page
76
84
 
77
85
  ## Options
78
86
 
@@ -166,4 +174,4 @@ Thanks to all the contributors:
166
174
 
167
175
  [https://github.com/jaimeiniesta/metainspector/graphs/contributors](https://github.com/jaimeiniesta/metainspector/graphs/contributors)
168
176
 
169
- Copyright (c) 2009-2012 Jaime Iniesta, released under the MIT license
177
+ Copyright (c) 2009-2012 Jaime Iniesta, released under the MIT license
@@ -16,8 +16,10 @@ module MetaInspector
16
16
  # Options:
17
17
  # => timeout: defaults to 20 seconds
18
18
  # => html_content_type_only: if an exception should be raised if request content-type is not text/html. Defaults to false
19
- # => allow_safe_redirections: if redirects from http to https sites on the same domain should be allowed or not
19
+ # => allow_safe_redirections: if redirects from http to https sites on the same domain should be allowed or not
20
20
  # => allow_unsafe_redirections: if redirects from https to http sites on the same domain should be allowed or not
21
+ # => document: the html of the url as a string
22
+ # => verbose: if the errors should be logged to the screen
21
23
  def initialize(url, options = {})
22
24
  options = defaults.merge(options)
23
25
 
@@ -32,6 +34,7 @@ module MetaInspector
32
34
  @allow_safe_redirections = options[:allow_safe_redirections]
33
35
  @allow_unsafe_redirections = options[:allow_unsafe_redirections]
34
36
  @verbose = options[:verbose]
37
+ @document = options[:document]
35
38
  end
36
39
 
37
40
  # Returns the parsed document title, from the content of the <title> tag.
@@ -1,5 +1,5 @@
1
1
  # -*- encoding: utf-8 -*-
2
2
 
3
3
  module MetaInspector
4
- VERSION = "1.13.1"
4
+ VERSION = "1.14.0"
5
5
  end
@@ -21,5 +21,5 @@ Gem::Specification.new do |gem|
21
21
  gem.add_development_dependency 'rspec', '2.12.0'
22
22
  gem.add_development_dependency 'fakeweb', '1.3.0'
23
23
  gem.add_development_dependency 'awesome_print', '1.1.0'
24
- gem.add_development_dependency 'rake', '10.0.2'
24
+ gem.add_development_dependency 'rake', '~> 10.0.3'
25
25
  end
@@ -113,6 +113,21 @@ describe MetaInspector do
113
113
  end
114
114
  end
115
115
 
116
+ describe 'Doing a basic scrape from passed url html' do
117
+
118
+ before(:each) do
119
+ @m = MetaInspector.new("http://cnn.com", :document => "<html><head><title>Hello From Passed Html</title><a href='/hello'>Hello link</a></head><body></body></html>")
120
+ end
121
+
122
+ it "should get correct links when the url html is passed as an option" do
123
+ @m.links.should == ["http://cnn.com/hello"]
124
+ end
125
+
126
+ it "should get the title" do
127
+ @m.title.should == "Hello From Passed Html"
128
+ end
129
+ end
130
+
116
131
  describe 'Page with missing meta description' do
117
132
  it "should find secondary description" do
118
133
  @m = MetaInspector.new('http://theonion-no-description.com')
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: metainspector
3
3
  version: !ruby/object:Gem::Version
4
- hash: 33
4
+ hash: 47
5
5
  prerelease:
6
6
  segments:
7
7
  - 1
8
- - 13
9
- - 1
10
- version: 1.13.1
8
+ - 14
9
+ - 0
10
+ version: 1.14.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Jaime Iniesta
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2012-12-13 00:00:00 Z
18
+ date: 2013-01-14 00:00:00 Z
19
19
  dependencies:
20
20
  - !ruby/object:Gem::Dependency
21
21
  name: nokogiri
@@ -118,14 +118,14 @@ dependencies:
118
118
  requirement: &id007 !ruby/object:Gem::Requirement
119
119
  none: false
120
120
  requirements:
121
- - - "="
121
+ - - ~>
122
122
  - !ruby/object:Gem::Version
123
- hash: 75
123
+ hash: 73
124
124
  segments:
125
125
  - 10
126
126
  - 0
127
- - 2
128
- version: 10.0.2
127
+ - 3
128
+ version: 10.0.3
129
129
  type: :development
130
130
  version_requirements: *id007
131
131
  description: MetaInspector lets you scrape a web page and get its title, charset, link and meta tags