jaimeiniesta-metainspector 1.1.3 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,7 @@
1
+ = 1.1.4
2
+ === 4th June, 2009
3
+ * Simplified code: removed address setter, just instantiate a new MetaInspector object if you need to scrape a different URL
4
+
1
5
  = 1.1.3
2
6
  === 22nd May, 2009
3
7
  * Simplified code: now there's no need to call page.scrape!, just initialize it and go directly to page.address, page.title, page.description, page.keywords or page.links, the page will be scraped on the fly
@@ -42,12 +42,6 @@ Once scraped, you can see the scraped data like this:
42
42
  page.keywords # meta keywords, as string
43
43
  page.links # array of strings, with every link found on the page
44
44
 
45
- You can also change the address of the page to be scraped using the address= setter, like this:
46
-
47
- page.address="http://jaimeiniesta.com"
48
-
49
- Doing so resets the state of the MetaInspector instance to the initial state (not scraped yet, cleared stored meta data). The page will be re-scraped when you consult any of its metadata again.
50
-
51
45
  The full scraped document if accessible from:
52
46
 
53
47
  page.document # Nokogiri doc that you can use it to get any element from the page
@@ -80,12 +74,6 @@ You can find some sample scripts on the samples folder, including a basic scrapi
80
74
 
81
75
  >> page.document.class
82
76
  => Nokogiri::HTML::Document
83
-
84
- >> page.address="http://jaimeiniesta.com"
85
- => "http://jaimeiniesta.com"
86
-
87
- >> page.title
88
- => "ruby on rails freelance developer -- Jaime Iniesta"
89
77
 
90
78
  = To Do
91
79
 
@@ -4,7 +4,7 @@ require 'nokogiri'
4
4
 
5
5
  # MetaInspector provides an easy way to scrape web pages and get its elements
6
6
  class MetaInspector
7
- VERSION = '1.1.3'
7
+ VERSION = '1.1.4'
8
8
 
9
9
  attr_reader :address
10
10
 
@@ -16,11 +16,6 @@ class MetaInspector
16
16
  @document = @title = @description = @keywords = @links = nil
17
17
  end
18
18
 
19
- # Setter for address. Initializes the whole state as the address is being changed.
20
- def address=(address)
21
- initialize(address)
22
- end
23
-
24
19
  # Returns the parsed document title
25
20
  def title
26
21
  @title ||= document.css('title').inner_html rescue nil
@@ -1,7 +1,7 @@
1
1
  Gem::Specification.new do |s|
2
2
  s.name = "metainspector"
3
- s.version = "1.1.3"
4
- s.date = "2009-05-19"
3
+ s.version = "1.1.4"
4
+ s.date = "2009-06-04"
5
5
  s.summary = "Ruby gem for web scraping"
6
6
  s.email = "jaimeiniesta@gmail.com"
7
7
  s.homepage = "http://github.com/jaimeiniesta/metainspector/tree/master"
@@ -3,10 +3,9 @@
3
3
  require '../lib/metainspector.rb'
4
4
 
5
5
  puts "Enter a valid http address to scrape it"
6
- address = gets
6
+ address = gets.strip
7
7
  page = MetaInspector.new(address)
8
- puts "Scraping #{address}"
9
- puts "...please wait..."
8
+ puts "...please wait while scraping the page..."
10
9
 
11
10
  puts "Scraping #{page.address} returned these results:"
12
11
  puts "TITLE: #{page.title}"
@@ -12,7 +12,7 @@ q.push(address)
12
12
 
13
13
  while q.size > 0
14
14
  visited_links << address = q.pop
15
- page.address=address
15
+ page = MetaInspector.new(address)
16
16
  puts "Spidering #{page.address}"
17
17
 
18
18
  puts "TITLE: #{page.title}"
@@ -16,29 +16,4 @@ class TestMetaInspector < Test::Unit::TestCase
16
16
  assert_equal m.links[30], 'http://www.nuvio.cz/'
17
17
  assert_equal m.document.class, Nokogiri::HTML::Document
18
18
  end
19
-
20
- # Test changing the address resets the state of the instance so it causes a new scraping
21
- def test_address_setter
22
- m = MetaInspector.new('http://pagerankalert.com')
23
- assert_equal m.address, 'http://pagerankalert.com'
24
- title_1 = m.title
25
- description_1 = m.description
26
- keywords_1 = m.keywords
27
- links_1 = m.links
28
- document_1 = m.document
29
-
30
- m.address = 'http://jaimeiniesta.com'
31
- assert_equal m.address, 'http://jaimeiniesta.com'
32
- title_2 = m.title
33
- description_2 = m.description
34
- keywords_2 = m.keywords
35
- links_2 = m.links
36
- document_2 = m.document
37
-
38
- assert_not_equal title_1, title_2
39
- assert_not_equal description_1, description_2
40
- assert_not_equal keywords_1, keywords_2
41
- assert_not_equal links_1, links_2
42
- assert_not_equal document_1, document_2
43
- end
44
19
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: jaimeiniesta-metainspector
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.3
4
+ version: 1.1.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Jaime Iniesta
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-05-19 00:00:00 -07:00
12
+ date: 2009-06-04 00:00:00 -07:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency