jaimeiniesta-metainspector 1.0.2

Sign up to get free protection for your applications and to get access to all the features.
data/README ADDED
@@ -0,0 +1,8 @@
1
+ --------------
2
+ MetaInspector
3
+ --------------
4
+ DESCRIPTION: Ruby gem for web scraping purposes. It scrapes a given URL, and returns you a hash with data from it like for example the title, meta description, meta keywords, an array with all the links, all the images in it, etc.
5
+ AUTHOR: Jaime Iniesta (jaimeiniesta@gmail.com, http://www.jaimeiniesta.com)
6
+ LICENSE: GPLv3
7
+
8
+ (more to come soon...)
@@ -0,0 +1,45 @@
1
+ class MetaInspector
2
+ require 'open-uri'
3
+ require 'rubygems'
4
+ require 'hpricot'
5
+
6
+ Hpricot.buffer_size = 300000
7
+
8
+ def self.scrape(url)
9
+ doc = Hpricot(open(url))
10
+
11
+ # Searching title...
12
+ if doc.at('title')
13
+ title = doc.at('title').inner_html
14
+ else
15
+ title = ""
16
+ end
17
+
18
+ # Searching meta description...
19
+ if doc.at("meta[@name='description']")
20
+ description = doc.at("meta[@name='description']")['content']
21
+ else
22
+ description = ""
23
+ end
24
+
25
+ # Searching meta keywords...
26
+ if doc.at("meta[@name='keywords']")
27
+ keywords = doc.at("meta[@name='keywords']")['content']
28
+ else
29
+ keywords = ""
30
+ end
31
+
32
+ # Searching links...
33
+ links = []
34
+ doc.search("//a").each do |link|
35
+ links << link.attributes["href"] if (!link.attributes["href"].nil?)
36
+ end
37
+
38
+ # Returning all data...
39
+ {'ok' => true, 'title' => title, 'description' => description, 'keywords' => keywords, 'links' => links}
40
+
41
+ rescue SocketError
42
+ puts 'MetaInspector exception: The url provided does not exist or is temporarily unavailable (socket error)'
43
+ {'ok' => false, 'title' => nil, 'description' => nil, 'keywords' => nil, 'links' => nil}
44
+ end
45
+ end
@@ -0,0 +1,16 @@
1
+ Gem::Specification.new do |s|
2
+ s.name = "metainspector"
3
+ s.version = "1.0.2"
4
+ s.date = "2008-06-27"
5
+ s.summary = "Ruby gem for web scraping"
6
+ s.email = "jaimeiniesta@gmail.com"
7
+ s.homepage = "http://code.jaimeiniesta.com/metainspector"
8
+ s.description = "MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL"
9
+ s.has_rdoc = false
10
+ s.authors = ["Jaime Iniesta"]
11
+ s.files = ["README", "metainspector.gemspec", "lib/metainspector.rb", "test/test_metainspector.rb"]
12
+ s.test_files = []
13
+ s.rdoc_options = []
14
+ s.extra_rdoc_files = []
15
+ s.add_dependency("hpricot", ["> 0.6"])
16
+ end
@@ -0,0 +1 @@
1
+
metadata ADDED
@@ -0,0 +1,64 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: jaimeiniesta-metainspector
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.2
5
+ platform: ruby
6
+ authors:
7
+ - Jaime Iniesta
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-06-27 00:00:00 -07:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: hpricot
17
+ version_requirement:
18
+ version_requirements: !ruby/object:Gem::Requirement
19
+ requirements:
20
+ - - ">"
21
+ - !ruby/object:Gem::Version
22
+ version: "0.6"
23
+ version:
24
+ description: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
25
+ email: jaimeiniesta@gmail.com
26
+ executables: []
27
+
28
+ extensions: []
29
+
30
+ extra_rdoc_files: []
31
+
32
+ files:
33
+ - README
34
+ - metainspector.gemspec
35
+ - lib/metainspector.rb
36
+ - test/test_metainspector.rb
37
+ has_rdoc: false
38
+ homepage: http://code.jaimeiniesta.com/metainspector
39
+ post_install_message:
40
+ rdoc_options: []
41
+
42
+ require_paths:
43
+ - lib
44
+ required_ruby_version: !ruby/object:Gem::Requirement
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ version:
50
+ required_rubygems_version: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: "0"
55
+ version:
56
+ requirements: []
57
+
58
+ rubyforge_project:
59
+ rubygems_version: 1.2.0
60
+ signing_key:
61
+ specification_version: 2
62
+ summary: Ruby gem for web scraping
63
+ test_files: []
64
+