jaimeiniesta-metainspector 1.0.2
Sign up to get free protection for your applications and to get access to all the features.
- data/README +8 -0
- data/lib/metainspector.rb +45 -0
- data/metainspector.gemspec +16 -0
- data/test/test_metainspector.rb +1 -0
- metadata +64 -0
data/README
ADDED
@@ -0,0 +1,8 @@
|
|
1
|
+
--------------
|
2
|
+
MetaInspector
|
3
|
+
--------------
|
4
|
+
DESCRIPTION: Ruby gem for web scraping purposes. It scrapes a given URL, and returns you a hash with data from it like for example the title, meta description, meta keywords, an array with all the links, all the images in it, etc.
|
5
|
+
AUTHOR: Jaime Iniesta (jaimeiniesta@gmail.com, http://www.jaimeiniesta.com)
|
6
|
+
LICENSE: GPLv3
|
7
|
+
|
8
|
+
(more to come soon...)
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class MetaInspector
|
2
|
+
require 'open-uri'
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hpricot'
|
5
|
+
|
6
|
+
Hpricot.buffer_size = 300000
|
7
|
+
|
8
|
+
def self.scrape(url)
|
9
|
+
doc = Hpricot(open(url))
|
10
|
+
|
11
|
+
# Searching title...
|
12
|
+
if doc.at('title')
|
13
|
+
title = doc.at('title').inner_html
|
14
|
+
else
|
15
|
+
title = ""
|
16
|
+
end
|
17
|
+
|
18
|
+
# Searching meta description...
|
19
|
+
if doc.at("meta[@name='description']")
|
20
|
+
description = doc.at("meta[@name='description']")['content']
|
21
|
+
else
|
22
|
+
description = ""
|
23
|
+
end
|
24
|
+
|
25
|
+
# Searching meta keywords...
|
26
|
+
if doc.at("meta[@name='keywords']")
|
27
|
+
keywords = doc.at("meta[@name='keywords']")['content']
|
28
|
+
else
|
29
|
+
keywords = ""
|
30
|
+
end
|
31
|
+
|
32
|
+
# Searching links...
|
33
|
+
links = []
|
34
|
+
doc.search("//a").each do |link|
|
35
|
+
links << link.attributes["href"] if (!link.attributes["href"].nil?)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Returning all data...
|
39
|
+
{'ok' => true, 'title' => title, 'description' => description, 'keywords' => keywords, 'links' => links}
|
40
|
+
|
41
|
+
rescue SocketError
|
42
|
+
puts 'MetaInspector exception: The url provided does not exist or is temporarily unavailable (socket error)'
|
43
|
+
{'ok' => false, 'title' => nil, 'description' => nil, 'keywords' => nil, 'links' => nil}
|
44
|
+
end
|
45
|
+
end
|
@@ -0,0 +1,16 @@
|
|
1
|
+
Gem::Specification.new do |s|
|
2
|
+
s.name = "metainspector"
|
3
|
+
s.version = "1.0.2"
|
4
|
+
s.date = "2008-06-27"
|
5
|
+
s.summary = "Ruby gem for web scraping"
|
6
|
+
s.email = "jaimeiniesta@gmail.com"
|
7
|
+
s.homepage = "http://code.jaimeiniesta.com/metainspector"
|
8
|
+
s.description = "MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL"
|
9
|
+
s.has_rdoc = false
|
10
|
+
s.authors = ["Jaime Iniesta"]
|
11
|
+
s.files = ["README", "metainspector.gemspec", "lib/metainspector.rb", "test/test_metainspector.rb"]
|
12
|
+
s.test_files = []
|
13
|
+
s.rdoc_options = []
|
14
|
+
s.extra_rdoc_files = []
|
15
|
+
s.add_dependency("hpricot", ["> 0.6"])
|
16
|
+
end
|
@@ -0,0 +1 @@
|
|
1
|
+
|
metadata
ADDED
@@ -0,0 +1,64 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jaimeiniesta-metainspector
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.2
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Jaime Iniesta
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-06-27 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
version_requirement:
|
18
|
+
version_requirements: !ruby/object:Gem::Requirement
|
19
|
+
requirements:
|
20
|
+
- - ">"
|
21
|
+
- !ruby/object:Gem::Version
|
22
|
+
version: "0.6"
|
23
|
+
version:
|
24
|
+
description: MetaInspector is a ruby gem for web scraping purposes, that returns a hash with metadata from a given URL
|
25
|
+
email: jaimeiniesta@gmail.com
|
26
|
+
executables: []
|
27
|
+
|
28
|
+
extensions: []
|
29
|
+
|
30
|
+
extra_rdoc_files: []
|
31
|
+
|
32
|
+
files:
|
33
|
+
- README
|
34
|
+
- metainspector.gemspec
|
35
|
+
- lib/metainspector.rb
|
36
|
+
- test/test_metainspector.rb
|
37
|
+
has_rdoc: false
|
38
|
+
homepage: http://code.jaimeiniesta.com/metainspector
|
39
|
+
post_install_message:
|
40
|
+
rdoc_options: []
|
41
|
+
|
42
|
+
require_paths:
|
43
|
+
- lib
|
44
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
45
|
+
requirements:
|
46
|
+
- - ">="
|
47
|
+
- !ruby/object:Gem::Version
|
48
|
+
version: "0"
|
49
|
+
version:
|
50
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - ">="
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: "0"
|
55
|
+
version:
|
56
|
+
requirements: []
|
57
|
+
|
58
|
+
rubyforge_project:
|
59
|
+
rubygems_version: 1.2.0
|
60
|
+
signing_key:
|
61
|
+
specification_version: 2
|
62
|
+
summary: Ruby gem for web scraping
|
63
|
+
test_files: []
|
64
|
+
|