metainspector 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +4 -0
- data/README.txt +22 -8
- data/Rakefile +5 -6
- data/lib/metainspector.rb +10 -10
- metadata +7 -7
data/History.txt
CHANGED
data/README.txt
CHANGED
@@ -1,32 +1,46 @@
|
|
1
1
|
metainspector
|
2
|
-
by
|
3
|
-
|
2
|
+
by Jaime Iniesta
|
3
|
+
http://metainspector.rubyforge.org/
|
4
4
|
|
5
5
|
== DESCRIPTION:
|
6
6
|
|
7
|
-
|
7
|
+
Ruby gem for web scraping purposes. It scrapes a given URL, and returns you a hash with data from it like for example the title, meta description, meta keywords, an array with all the links, all the images in it, etc.
|
8
8
|
|
9
9
|
== FEATURES/PROBLEMS:
|
10
10
|
|
11
|
-
*
|
11
|
+
* Scrape a given URL and return data from its HTML
|
12
12
|
|
13
13
|
== SYNOPSIS:
|
14
14
|
|
15
|
-
|
15
|
+
# Require all gems and libs needed...
|
16
|
+
require 'rubygems'
|
17
|
+
require 'open-uri'
|
18
|
+
require 'hpricot'
|
19
|
+
require 'metainspector'
|
20
|
+
|
21
|
+
# Scrape an URL...
|
22
|
+
page_data = MetaInspector.scrape(url)
|
23
|
+
|
24
|
+
# See extracted data...
|
25
|
+
page_data['title']
|
26
|
+
page_data['description']
|
27
|
+
page_data['keywords']
|
28
|
+
page_data['links']
|
16
29
|
|
17
30
|
== REQUIREMENTS:
|
18
31
|
|
19
|
-
*
|
32
|
+
* open-uri
|
33
|
+
* hpricot
|
20
34
|
|
21
35
|
== INSTALL:
|
22
36
|
|
23
|
-
*
|
37
|
+
* sudo gem install metainspector
|
24
38
|
|
25
39
|
== LICENSE:
|
26
40
|
|
27
41
|
(The MIT License)
|
28
42
|
|
29
|
-
Copyright (c) 2007
|
43
|
+
Copyright (c) 2007 Jaime Iniesta
|
30
44
|
|
31
45
|
Permission is hereby granted, free of charge, to any person obtaining
|
32
46
|
a copy of this software and associated documentation files (the
|
data/Rakefile
CHANGED
@@ -8,12 +8,11 @@ require './lib/metainspector.rb'
|
|
8
8
|
|
9
9
|
Hoe.new('metainspector', MetaInspector::VERSION) do |p|
|
10
10
|
p.rubyforge_name = 'metainspector'
|
11
|
-
p.
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
# p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
11
|
+
p.author = 'Jaime Iniesta'
|
12
|
+
p.email = 'jaimeiniesta@gmail.com'
|
13
|
+
p.summary = 'Ruby gem for web scraping purposes. It scrapes a given URL, and returns you a hash with data from it like for example the title, meta description, meta keywords, an array with all the links, all the images in it, etc.'
|
14
|
+
p.description = p.paragraphs_of('README.txt', 2..5).join("\n\n")
|
15
|
+
p.url = p.paragraphs_of('README.txt', 0).first.split(/\n/)[1..-1]
|
17
16
|
p.changes = p.paragraphs_of('History.txt', 0..1).join("\n\n")
|
18
17
|
end
|
19
18
|
|
data/lib/metainspector.rb
CHANGED
@@ -1,39 +1,39 @@
|
|
1
1
|
class MetaInspector
|
2
|
-
VERSION = '1.0.
|
2
|
+
VERSION = '1.0.1'
|
3
3
|
|
4
4
|
Hpricot.buffer_size = 300000
|
5
5
|
|
6
6
|
def self.scrape(url)
|
7
7
|
doc = Hpricot(open(url))
|
8
8
|
|
9
|
-
#
|
9
|
+
# Searching title...
|
10
10
|
if (!doc.at('title').nil?)
|
11
11
|
title = doc.at('title').inner_html
|
12
12
|
else
|
13
13
|
title = ""
|
14
14
|
end
|
15
15
|
|
16
|
-
#
|
16
|
+
# Searching meta description...
|
17
17
|
if (!doc.at("meta[@name='description']").nil?)
|
18
18
|
description = doc.at("meta[@name='description']")['content']
|
19
19
|
else
|
20
20
|
description = ""
|
21
21
|
end
|
22
22
|
|
23
|
-
#
|
23
|
+
# Searching meta keywords...
|
24
24
|
if (!doc.at("meta[@name='keywords']").nil?)
|
25
25
|
keywords = doc.at("meta[@name='keywords']")['content']
|
26
26
|
else
|
27
27
|
keywords = ""
|
28
28
|
end
|
29
29
|
|
30
|
-
#
|
31
|
-
|
32
|
-
doc.search("//a").each do |
|
33
|
-
|
30
|
+
# Searching links...
|
31
|
+
links = []
|
32
|
+
doc.search("//a").each do |link|
|
33
|
+
links << link.attributes["href"] if (!link.attributes["href"].nil?)
|
34
34
|
end
|
35
35
|
|
36
|
-
#
|
37
|
-
{'ok' => true, 'title' => title, 'description' => description, 'keywords' => keywords, '
|
36
|
+
# Returning all data...
|
37
|
+
{'ok' => true, 'title' => title, 'description' => description, 'keywords' => keywords, 'links' => links}
|
38
38
|
end
|
39
39
|
end
|
metadata
CHANGED
@@ -3,15 +3,15 @@ rubygems_version: 0.9.2
|
|
3
3
|
specification_version: 1
|
4
4
|
name: metainspector
|
5
5
|
version: !ruby/object:Gem::Version
|
6
|
-
version: 1.0.
|
7
|
-
date: 2007-12-
|
8
|
-
summary:
|
6
|
+
version: 1.0.1
|
7
|
+
date: 2007-12-07 00:00:00 +01:00
|
8
|
+
summary: Ruby gem for web scraping purposes. It scrapes a given URL, and returns you a hash with data from it like for example the title, meta description, meta keywords, an array with all the links, all the images in it, etc.
|
9
9
|
require_paths:
|
10
10
|
- lib
|
11
|
-
email:
|
12
|
-
homepage:
|
11
|
+
email: jaimeiniesta@gmail.com
|
12
|
+
homepage: " by Jaime Iniesta"
|
13
13
|
rubyforge_project: metainspector
|
14
|
-
description:
|
14
|
+
description: "== FEATURES/PROBLEMS: * Scrape a given URL and return data from its HTML == SYNOPSIS: # Require all gems and libs needed... require 'rubygems' require 'open-uri' require 'hpricot' require 'metainspector' # Scrape an URL... page_data = MetaInspector.scrape(url)"
|
15
15
|
autorequire:
|
16
16
|
default_executable:
|
17
17
|
bindir: bin
|
@@ -27,7 +27,7 @@ signing_key:
|
|
27
27
|
cert_chain:
|
28
28
|
post_install_message:
|
29
29
|
authors:
|
30
|
-
-
|
30
|
+
- Jaime Iniesta
|
31
31
|
files:
|
32
32
|
- History.txt
|
33
33
|
- Manifest.txt
|