free-scrape 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +12 -0
- data/Manifest.txt +14 -0
- data/README.txt +35 -0
- data/Rakefile +15 -0
- data/lib/free_scrape.rb +3 -0
- data/lib/free_scrape/category.rb +33 -0
- data/lib/free_scrape/free_scrape.rb +185 -0
- data/lib/free_scrape/item.rb +171 -0
- data/lib/free_scrape/item_link.rb +26 -0
- data/lib/free_scrape/version.rb +3 -0
- data/spec/free_scrape_spec.rb +33 -0
- data/spec/item_spec.rb +34 -0
- data/spec/spec_helper.rb +7 -0
- data/tasks/spec.rb +7 -0
- metadata +99 -0
data/History.txt
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
=== 0.1.0 / 2008-09-29
|
2
|
+
|
3
|
+
* Renamed freebase to free-scrape, in order to not conflict with the other
|
4
|
+
freebase gem from the freebaseapi project.
|
5
|
+
|
6
|
+
=== 0.0.9 / 2008-09-28
|
7
|
+
|
8
|
+
* Initial release.
|
9
|
+
* Can request items from freebase.com using either a URL, an Item GUID or an
|
10
|
+
Item name.
|
11
|
+
* Preserves tags and other freebase links.
|
12
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
lib/free_scrape.rb
|
6
|
+
lib/free_scrape/item_link.rb
|
7
|
+
lib/free_scrape/category.rb
|
8
|
+
lib/free_scrape/item.rb
|
9
|
+
lib/free_scrape/free_scrape.rb
|
10
|
+
lib/free_scrape/version.rb
|
11
|
+
tasks/spec.rb
|
12
|
+
spec/item_spec.rb
|
13
|
+
spec/free_scrape_spec.rb
|
14
|
+
spec/spec_helper.rb
|
data/README.txt
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
= FreeBase
|
2
|
+
|
3
|
+
* http://freebase.rubyforge.org/
|
4
|
+
* Postmodern (postmodern.mod3@gmail.com)
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
A web-scraping interface to freebase.com, the open and shared database of the
|
9
|
+
world's knowledge.
|
10
|
+
|
11
|
+
== FEATURES/PROBLEMS:
|
12
|
+
|
13
|
+
* Can request items from freebase.com using either a URL, an Item GUID or an
|
14
|
+
Item name.
|
15
|
+
* Preserves tags and other freebase links.
|
16
|
+
|
17
|
+
== EXAMPLES:
|
18
|
+
|
19
|
+
require 'free_scrape'
|
20
|
+
|
21
|
+
FreeScrape.item('Aphex Twin')
|
22
|
+
# => #<FreeScrape::Item:0xb73fdba0 ...>
|
23
|
+
|
24
|
+
FreeScrape.item('http://www.freebase.com/view/guid/9202a8c04000641f8000000003ac957f')
|
25
|
+
# => #<FreeScrape::Item:0xb73fe3dc ...>
|
26
|
+
|
27
|
+
== REQUIREMENTS:
|
28
|
+
|
29
|
+
* Hpricot
|
30
|
+
* WWW::Mechanize
|
31
|
+
|
32
|
+
== INSTALL:
|
33
|
+
|
34
|
+
$ sudo gem install free-scrape
|
35
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require './tasks/spec.rb'
|
6
|
+
require './lib/free_scrape/version.rb'
|
7
|
+
|
8
|
+
Hoe.new('free-scrape', FreeScrape::VERSION) do |p|
|
9
|
+
p.rubyforge_name = 'freebase'
|
10
|
+
p.remote_rdoc_dir = ''
|
11
|
+
p.developer('Postmodern Modulus III', 'postmodern.mod3@gmail.com')
|
12
|
+
p.extra_deps = ['hpricot', 'mechanize']
|
13
|
+
end
|
14
|
+
|
15
|
+
# vim: syntax=Ruby
|
data/lib/free_scrape.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
module FreeScrape
|
2
|
+
class Category
|
3
|
+
|
4
|
+
# Name of the category
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
# Metadata associated with the category
|
8
|
+
attr_reader :metadata
|
9
|
+
|
10
|
+
#
|
11
|
+
# Creates a new Category object with the specified _name_.
|
12
|
+
#
|
13
|
+
def initialize(name)
|
14
|
+
@name = name
|
15
|
+
@metadata = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Returns the metadata with the specified _name_.
|
20
|
+
#
|
21
|
+
def [](name)
|
22
|
+
@metadata[name]
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Returns the name of the category in +String+ form.
|
27
|
+
#
|
28
|
+
def to_s
|
29
|
+
@name.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'free_scrape/item'
|
2
|
+
require 'free_scrape/item_link'
|
3
|
+
|
4
|
+
require 'www/mechanize'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'open-uri'
|
7
|
+
|
8
|
+
module FreeScrape
|
9
|
+
# Common proxy port
|
10
|
+
COMMON_PROXY_PORT = 8080
|
11
|
+
|
12
|
+
# Default language
|
13
|
+
DEFAULT_LANGUAGE = :en
|
14
|
+
|
15
|
+
#
|
16
|
+
# Returns the +Hash+ of proxy information.
|
17
|
+
#
|
18
|
+
def FreeScrape.proxy
|
19
|
+
@@free_scrape_proxy ||= {
|
20
|
+
:host => nil,
|
21
|
+
:port => COMMON_PROXY_PORT,
|
22
|
+
:user => nil,
|
23
|
+
:password => nil
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Creates a HTTP URI based from the given _proxy_info_ hash. The
|
29
|
+
# _proxy_info_ hash defaults to Web.proxy, if not given.
|
30
|
+
#
|
31
|
+
# _proxy_info_ may contain the following keys:
|
32
|
+
# <tt>:host</tt>:: The proxy host.
|
33
|
+
# <tt>:port</tt>:: The proxy port. Defaults to COMMON_PROXY_PORT,
|
34
|
+
# if not specified.
|
35
|
+
# <tt>:user</tt>:: The user-name to login as.
|
36
|
+
# <tt>:password</tt>:: The password to login with.
|
37
|
+
#
|
38
|
+
def FreeScrape.proxy_uri(proxy_info=FreeScrape.proxy)
|
39
|
+
if FreeScrape.proxy[:host]
|
40
|
+
return URI::HTTP.build(:host => FreeScrape.proxy[:host],
|
41
|
+
:port => FreeScrape.proxy[:port],
|
42
|
+
:userinfo => "#{FreeScrape.proxy[:user]}:#{FreeScrape.proxy[:password]}",
|
43
|
+
:path => '/')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Returns the supported FreeScrape User-Agent Aliases.
|
49
|
+
#
|
50
|
+
def FreeScrape.user_agent_aliases
|
51
|
+
WWW::Mechanize::AGENT_ALIASES
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Returns the FreeScrape User-Agent
|
56
|
+
#
|
57
|
+
def FreeScrape.user_agent
|
58
|
+
@@free_scrape_user_agent ||= FreeScrape.user_agent_aliases['Windows IE 6']
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Sets the FreeScrape User-Agent to the specified _agent_.
|
63
|
+
#
|
64
|
+
def FreeScrape.user_agent=(agent)
|
65
|
+
@@free_scrape_user_agent = agent
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Sets the FreeScrape User-Agent using the specified user-agent alias
|
70
|
+
# _name_.
|
71
|
+
#
|
72
|
+
def FreeScrape.user_agent_alias=(name)
|
73
|
+
@@free_scrape_user_agent = FreeScrape.user_agent_aliases[name.to_s]
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Opens the _uri_ with the given _options_. The contents of the _uri_
|
78
|
+
# will be returned.
|
79
|
+
#
|
80
|
+
# _options_ may contain the following keys:
|
81
|
+
# <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
|
82
|
+
# <tt>:user_agent</tt>:: The User-Agent String to use.
|
83
|
+
# <tt>:proxy</tt>:: A +Hash+ of proxy information which may
|
84
|
+
# contain the following keys:
|
85
|
+
# <tt>:host</tt>:: The proxy host.
|
86
|
+
# <tt>:port</tt>:: The proxy port.
|
87
|
+
# <tt>:user</tt>:: The user-name to login as.
|
88
|
+
# <tt>:password</tt>:: The password to login with.
|
89
|
+
#
|
90
|
+
# FreeScrape.open_uri('http://www.hackety.org/')
|
91
|
+
#
|
92
|
+
# FreeScrape.open_uri('http://tenderlovemaking.com/',
|
93
|
+
# :user_agent_alias => 'Linux Mozilla')
|
94
|
+
# FreeScrape.open_uri('http://www.wired.com/',
|
95
|
+
# :user_agent => 'the future')
|
96
|
+
#
|
97
|
+
def FreeScrape.open_uri(uri,options={})
|
98
|
+
headers = {}
|
99
|
+
|
100
|
+
if options[:user_agent_alias]
|
101
|
+
headers['User-Agent'] = WWW::Mechanize::AGENT_ALIASES[options[:user_agent_alias]]
|
102
|
+
elsif options[:user_agent]
|
103
|
+
headers['User-Agent'] = options[:user_agent]
|
104
|
+
elsif FreeScrape.user_agent
|
105
|
+
headers['User-Agent'] = FreeScrape.user_agent
|
106
|
+
end
|
107
|
+
|
108
|
+
proxy = (options[:proxy] || FreeScrape.proxy)
|
109
|
+
if proxy[:host]
|
110
|
+
headers[:proxy] = FreeScrape.proxy_uri(proxy)
|
111
|
+
end
|
112
|
+
|
113
|
+
return Kernel.open(uri,headers)
|
114
|
+
end
|
115
|
+
|
116
|
+
#
|
117
|
+
# Similar to FreeScrape.open_uri but returns an Hpricot document.
|
118
|
+
#
|
119
|
+
def FreeScrape.open_page(uri,options={})
|
120
|
+
Hpricot(FreeScrape.open_uri(uri,options))
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Creates a new WWW::Mechanize agent with the given _options_.
|
125
|
+
#
|
126
|
+
# _options_ may contain the following keys:
|
127
|
+
# <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
|
128
|
+
# <tt>:user_agent</tt>:: The User-Agent string to use.
|
129
|
+
# <tt>:proxy</tt>:: A +Hash+ of proxy information which may
|
130
|
+
# contain the following keys:
|
131
|
+
# <tt>:host</tt>:: The proxy host.
|
132
|
+
# <tt>:port</tt>:: The proxy port.
|
133
|
+
# <tt>:user</tt>:: The user-name to login as.
|
134
|
+
# <tt>:password</tt>:: The password to login with.
|
135
|
+
#
|
136
|
+
# FreeScrape.web_agent
|
137
|
+
#
|
138
|
+
# FreeScrape.web_agent(:user_agent_alias => 'Linux Mozilla')
|
139
|
+
# FreeScrape.web_agent(:user_agent => 'Google Bot')
|
140
|
+
#
|
141
|
+
def FreeScrape.web_agent(options={},&block)
|
142
|
+
agent = WWW::Mechanize.new
|
143
|
+
|
144
|
+
if options[:user_agent_alias]
|
145
|
+
agent.user_agent_alias = options[:user_agent_alias]
|
146
|
+
elsif options[:user_agent]
|
147
|
+
agent.user_agent = options[:user_agent]
|
148
|
+
elsif FreeScrape.user_agent
|
149
|
+
agent.user_agent = FreeScrape.user_agent
|
150
|
+
end
|
151
|
+
|
152
|
+
proxy = (options[:proxy] || FreeScrape.proxy)
|
153
|
+
if proxy[:host]
|
154
|
+
agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password])
|
155
|
+
end
|
156
|
+
|
157
|
+
block.call(agent) if block
|
158
|
+
return agent
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Returns the language to access FreeScrape with.
|
163
|
+
#
|
164
|
+
def FreeScrape.language
|
165
|
+
@@free_scrape_language ||= DEFAULT_LANGUAGE
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Sets the language to access FreeScrape with to the _new_language_.
|
170
|
+
#
|
171
|
+
def FreeScrape.language=(new_language)
|
172
|
+
@@free_scrape_language = new_language.to_sym
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Returns the Item with the specified _descriptor_, which can be either
|
177
|
+
# a URI to freebase.com, an Item GUID or an Item name.
|
178
|
+
#
|
179
|
+
# FreeScrape.item('Aphex Twin')
|
180
|
+
# # => #<FreeScrape::Item:0xb73fdba0 ...>
|
181
|
+
#
|
182
|
+
def FreeScrape.item(descriptor)
|
183
|
+
Item.from(descriptor)
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'free_scrape/item_link'
|
2
|
+
require 'free_scrape/category'
|
3
|
+
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module FreeScrape
|
7
|
+
class Item
|
8
|
+
|
9
|
+
# URL of the item
|
10
|
+
attr_reader :url
|
11
|
+
|
12
|
+
# Name of the item
|
13
|
+
attr_accessor :name
|
14
|
+
|
15
|
+
# Description of the item
|
16
|
+
attr_accessor :summary
|
17
|
+
|
18
|
+
# Categories the item is in
|
19
|
+
attr_reader :categories
|
20
|
+
|
21
|
+
#
|
22
|
+
# Creates a new Item object with the specified _name_ and the given
|
23
|
+
# _options_.
|
24
|
+
#
|
25
|
+
def initialize(url,options={})
|
26
|
+
@url = url
|
27
|
+
@name = options[:name]
|
28
|
+
@summary = options[:summary]
|
29
|
+
|
30
|
+
@categories = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Returns the Item object with the specified _descriptor_, which can
|
35
|
+
# be either a URI to freebase.com, an Item GUID or an Item name.
|
36
|
+
#
|
37
|
+
def Item.from(descriptor)
|
38
|
+
descriptor = descriptor.to_s
|
39
|
+
|
40
|
+
if descriptor =~ /^[0-9a-f]+$/
|
41
|
+
return Item.guid(descriptor)
|
42
|
+
elsif descriptor =~ /^http(s)?:\/\/(www\.)?freebase.com\/view/
|
43
|
+
return Item.from_url(descriptor)
|
44
|
+
else
|
45
|
+
return Item.named(descriptor)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Returns the Item object with the specified _name_.
|
51
|
+
#
|
52
|
+
def Item.named(name)
|
53
|
+
name = name.split(' ').map { |word|
|
54
|
+
word.downcase
|
55
|
+
}.join('_')
|
56
|
+
|
57
|
+
return Item.from_url("http://www.freebase.com/view/#{FreeScrape.language}/#{name}")
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Returns the Item object with the specified _guid_.
|
62
|
+
#
|
63
|
+
def Item.guid(guid)
|
64
|
+
Item.from_url("http://www.freebase.com/view/guid/#{guid}")
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Creates the Item at the specified _url_.
|
69
|
+
#
|
70
|
+
# Item.from_url('http://www.freebase.com/view/guid/9202a8c04000641f800000000301146f')
|
71
|
+
# # => #<FreeScrape::Item:0xb73fdba0 ...>
|
72
|
+
#
|
73
|
+
def Item.from_url(url)
|
74
|
+
url = URI(url.to_s)
|
75
|
+
page = FreeScrape.open_page(url)
|
76
|
+
new_item = Item.new(url)
|
77
|
+
|
78
|
+
content = page.at('#content_main')
|
79
|
+
|
80
|
+
new_item.name = content.at('#title//h1').inner_text.strip
|
81
|
+
new_item.summary = content.at('#title/div.article-container/div.article').inner_html.strip
|
82
|
+
|
83
|
+
extract_value = lambda { |elem|
|
84
|
+
if (item_link = elem.at('a.pv'))
|
85
|
+
link_url = new_item.url.merge(item_link['href'])
|
86
|
+
|
87
|
+
ItemLink.new(item_link.inner_text.strip, link_url)
|
88
|
+
elsif elem.at('a.detail-view').nil?
|
89
|
+
text = elem.inner_text.strip
|
90
|
+
|
91
|
+
if text.empty?
|
92
|
+
nil
|
93
|
+
else
|
94
|
+
text
|
95
|
+
end
|
96
|
+
end
|
97
|
+
}
|
98
|
+
|
99
|
+
content.search('div.domainsboxes//div.domainbox//div.typebox-container') do |domainbox|
|
100
|
+
category_name = domainbox.at('//div.typebox-column-title/a').inner_text
|
101
|
+
new_category = Category.new(category_name)
|
102
|
+
|
103
|
+
domainbox.search('//div.prop-typebox') do |field|
|
104
|
+
field_name = field.at('//span.prop-title').inner_text
|
105
|
+
field_content = field.at('//div.prop-content')
|
106
|
+
|
107
|
+
field_value = nil
|
108
|
+
|
109
|
+
if (table = field_content.at('table.prop-table'))
|
110
|
+
field_value = []
|
111
|
+
|
112
|
+
column_names = table.search('tr/th/div.prop-table-cell').map do |div|
|
113
|
+
div.inner_text.strip
|
114
|
+
end
|
115
|
+
|
116
|
+
table.search('tr[td]') do |row|
|
117
|
+
field_row = {}
|
118
|
+
index = 0
|
119
|
+
|
120
|
+
row.search('td') do |cell|
|
121
|
+
if (value = extract_value.call(cell))
|
122
|
+
field_row[column_names[index]] = value
|
123
|
+
end
|
124
|
+
|
125
|
+
index += 1
|
126
|
+
end
|
127
|
+
|
128
|
+
field_value << field_row unless field_row.empty?
|
129
|
+
end
|
130
|
+
elsif (list = field_content.at('ul.prop-list'))
|
131
|
+
field_value = []
|
132
|
+
|
133
|
+
list.search('li.prop-list-item') do |list_item|
|
134
|
+
if (value = extract_value.call(list_item))
|
135
|
+
field_value << value
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
new_category.metadata[field_name] = field_value
|
141
|
+
end
|
142
|
+
|
143
|
+
new_item.categories[new_category.name] = new_category
|
144
|
+
end
|
145
|
+
|
146
|
+
return new_item
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Returns the category names of the item.
|
151
|
+
#
|
152
|
+
def category_names
|
153
|
+
@categories.keys
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Returns the Category with the specified _name_ of the item.
|
158
|
+
#
|
159
|
+
def [](name)
|
160
|
+
@categories[name]
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Returns the name of the item.
|
165
|
+
#
|
166
|
+
def to_s
|
167
|
+
@name.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module FreeScrape
|
2
|
+
class ItemLink
|
3
|
+
|
4
|
+
# Title of the item
|
5
|
+
attr_reader :title
|
6
|
+
|
7
|
+
# URL of the item
|
8
|
+
attr_reader :url
|
9
|
+
|
10
|
+
#
|
11
|
+
# Creates a new ItemLink with the specified _title_ and _url_.
|
12
|
+
#
|
13
|
+
def initialize(title,url)
|
14
|
+
@title = title
|
15
|
+
@url = url
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Returns the title of the item-link in +String+ form.
|
20
|
+
#
|
21
|
+
def to_s
|
22
|
+
@title.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FreeScrape do
|
4
|
+
it "should have a version" do
|
5
|
+
FreeScrape.const_get('VERSION').should_not be_nil
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should have a default language" do
|
9
|
+
FreeScrape.language.should_not be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have a default User-Agent string" do
|
13
|
+
FreeScrape.user_agent.should_not be_nil
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should return an item from a given URL" do
|
17
|
+
@item = FreeScrape.item('http://www.freebase.com/view/en/squarepusher/')
|
18
|
+
@item.should_not be_nil
|
19
|
+
@item.name.should == 'Squarepusher'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return an item from a given GUID" do
|
23
|
+
@item = FreeScrape.item('9202a8c04000641f8000000000184c7a')
|
24
|
+
@item.should_not be_nil
|
25
|
+
@item.name.should == 'Conflict'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should return an item for a given title" do
|
29
|
+
@item = FreeScrape.item('Aphex Twin')
|
30
|
+
@item.should_not be_nil
|
31
|
+
@item.name.should == 'Aphex Twin'
|
32
|
+
end
|
33
|
+
end
|
data/spec/item_spec.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Item do
|
4
|
+
before(:all) do
|
5
|
+
@item = Item.from_url('http://www.freebase.com/view/en/aphex_twin')
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should have a URL" do
|
9
|
+
@item.url.should_not be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have a name" do
|
13
|
+
@item.name.should_not be_nil
|
14
|
+
@item.name.should_not be_empty
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have categories" do
|
18
|
+
@item.categories.should_not be_empty
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have category names" do
|
22
|
+
@item.category_names.should_not be_empty
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have metadata for each category" do
|
26
|
+
@item.categories.each_value do |category|
|
27
|
+
category.metadata.should_not be_empty
|
28
|
+
|
29
|
+
category.metadata.each_value do |data|
|
30
|
+
data.should_not be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/tasks/spec.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: free-scrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Postmodern Modulus III
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-09-29 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mechanize
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: hoe
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.7.0
|
44
|
+
version:
|
45
|
+
description: A web-scraping interface to freebase.com, the open and shared database of the world's knowledge.
|
46
|
+
email:
|
47
|
+
- postmodern.mod3@gmail.com
|
48
|
+
executables: []
|
49
|
+
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
extra_rdoc_files:
|
53
|
+
- History.txt
|
54
|
+
- Manifest.txt
|
55
|
+
- README.txt
|
56
|
+
files:
|
57
|
+
- History.txt
|
58
|
+
- Manifest.txt
|
59
|
+
- README.txt
|
60
|
+
- Rakefile
|
61
|
+
- lib/free_scrape.rb
|
62
|
+
- lib/free_scrape/item_link.rb
|
63
|
+
- lib/free_scrape/category.rb
|
64
|
+
- lib/free_scrape/item.rb
|
65
|
+
- lib/free_scrape/free_scrape.rb
|
66
|
+
- lib/free_scrape/version.rb
|
67
|
+
- tasks/spec.rb
|
68
|
+
- spec/item_spec.rb
|
69
|
+
- spec/free_scrape_spec.rb
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
has_rdoc: true
|
72
|
+
homepage: http://freebase.rubyforge.org/
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options:
|
75
|
+
- --main
|
76
|
+
- README.txt
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: "0"
|
84
|
+
version:
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
version:
|
91
|
+
requirements: []
|
92
|
+
|
93
|
+
rubyforge_project: freebase
|
94
|
+
rubygems_version: 1.2.0
|
95
|
+
signing_key:
|
96
|
+
specification_version: 2
|
97
|
+
summary: A web-scraping interface to freebase.com, the open and shared database of the world's knowledge.
|
98
|
+
test_files: []
|
99
|
+
|