jaimeiniesta-metainspector 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG.rdoc +5 -0
- data/README.rdoc +3 -2
- data/lib/metainspector.rb +6 -17
- data/metainspector.gemspec +1 -1
- data/test/test_metainspector.rb +2 -2
- metadata +1 -1
data/CHANGELOG.rdoc
CHANGED
@@ -1,3 +1,8 @@
|
|
1
|
+
= 1.1.1
|
2
|
+
=== 14th May, 2009
|
3
|
+
* Simplified scrape method, leaves as nil the metadata not found, to be able to distinguish between a not found element or a found element that was empty.
|
4
|
+
* Links array is initialized as an empty array
|
5
|
+
|
1
6
|
= 1.1.0
|
2
7
|
=== 14th May, 2009
|
3
8
|
* Rewritten to use instance methods instead of class methods.
|
data/README.rdoc
CHANGED
@@ -1,6 +1,6 @@
|
|
1
1
|
= MetaInspector
|
2
2
|
|
3
|
-
MetaInspector is a gem for web scraping purposes. You give it an URL, and it returns you
|
3
|
+
MetaInspector is a gem for web scraping purposes. You give it an URL, and it returns you metadata from it.
|
4
4
|
|
5
5
|
= Installation
|
6
6
|
|
@@ -51,7 +51,8 @@ You can find some sample scripts on the samples folder, including a basic scrapi
|
|
51
51
|
=> true
|
52
52
|
|
53
53
|
>> page = MetaInspector.new('http://pagerankalert.com')
|
54
|
-
=> #<MetaInspector:0x5fc594 @full_doc=nil, @scraped=false, @description=nil, @links=nil,
|
54
|
+
=> #<MetaInspector:0x5fc594 @full_doc=nil, @scraped=false, @description=nil, @links=nil,
|
55
|
+
@address="http://pagerankalert.com", @keywords=nil, @scraped_doc=nil, @title=nil>
|
55
56
|
|
56
57
|
>> page.scrape!
|
57
58
|
=> true
|
data/lib/metainspector.rb
CHANGED
@@ -4,7 +4,7 @@ require 'hpricot'
|
|
4
4
|
|
5
5
|
# MetaInspector provides an easy way to scrape web pages and get its elements
|
6
6
|
class MetaInspector
|
7
|
-
VERSION = '1.1.
|
7
|
+
VERSION = '1.1.1'
|
8
8
|
|
9
9
|
Hpricot.buffer_size = 300000
|
10
10
|
|
@@ -16,7 +16,8 @@ class MetaInspector
|
|
16
16
|
@address = address
|
17
17
|
@scraped = false
|
18
18
|
|
19
|
-
@title = @description = @keywords = @
|
19
|
+
@title = @description = @keywords = @full_doc = @scraped_doc = nil
|
20
|
+
@links = []
|
20
21
|
end
|
21
22
|
|
22
23
|
# Setter for address. Initializes the whole state as the address is being changed.
|
@@ -30,25 +31,13 @@ class MetaInspector
|
|
30
31
|
@scraped_doc = Hpricot(@full_doc)
|
31
32
|
|
32
33
|
# Searching title...
|
33
|
-
if @scraped_doc.at('title')
|
34
|
-
@title = @scraped_doc.at('title').inner_html.strip
|
35
|
-
else
|
36
|
-
@title = ""
|
37
|
-
end
|
34
|
+
@title = @scraped_doc.at('title').inner_html.strip if @scraped_doc.at('title')
|
38
35
|
|
39
36
|
# Searching meta description...
|
40
|
-
if @scraped_doc.at("meta[@name='description']")
|
41
|
-
@description = @scraped_doc.at("meta[@name='description']")['content'].strip
|
42
|
-
else
|
43
|
-
@description = ""
|
44
|
-
end
|
37
|
+
@description = @scraped_doc.at("meta[@name='description']")['content'].strip if @scraped_doc.at("meta[@name='description']")
|
45
38
|
|
46
39
|
# Searching meta keywords...
|
47
|
-
if @scraped_doc.at("meta[@name='keywords']")
|
48
|
-
@keywords = @scraped_doc.at("meta[@name='keywords']")['content'].strip
|
49
|
-
else
|
50
|
-
@keywords = ""
|
51
|
-
end
|
40
|
+
@keywords = @scraped_doc.at("meta[@name='keywords']")['content'].strip if @scraped_doc.at("meta[@name='keywords']")
|
52
41
|
|
53
42
|
# Searching links...
|
54
43
|
@links = []
|
data/metainspector.gemspec
CHANGED
data/test/test_metainspector.rb
CHANGED
@@ -14,7 +14,7 @@ class TestMetaInspector < Test::Unit::TestCase
|
|
14
14
|
assert_nil m.title
|
15
15
|
assert_nil m.description
|
16
16
|
assert_nil m.keywords
|
17
|
-
|
17
|
+
assert_equal m.links.size, 0
|
18
18
|
assert_nil m.full_doc
|
19
19
|
assert_nil m.scraped_doc
|
20
20
|
end
|
@@ -53,7 +53,7 @@ class TestMetaInspector < Test::Unit::TestCase
|
|
53
53
|
assert_nil m.title
|
54
54
|
assert_nil m.description
|
55
55
|
assert_nil m.keywords
|
56
|
-
|
56
|
+
assert_equal m.links.size, 0
|
57
57
|
assert_nil m.full_doc
|
58
58
|
assert_nil m.scraped_doc
|
59
59
|
end
|