free-scrape 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +12 -0
- data/Manifest.txt +14 -0
- data/README.txt +35 -0
- data/Rakefile +15 -0
- data/lib/free_scrape.rb +3 -0
- data/lib/free_scrape/category.rb +33 -0
- data/lib/free_scrape/free_scrape.rb +185 -0
- data/lib/free_scrape/item.rb +171 -0
- data/lib/free_scrape/item_link.rb +26 -0
- data/lib/free_scrape/version.rb +3 -0
- data/spec/free_scrape_spec.rb +33 -0
- data/spec/item_spec.rb +34 -0
- data/spec/spec_helper.rb +7 -0
- data/tasks/spec.rb +7 -0
- metadata +99 -0
data/History.txt
ADDED
@@ -0,0 +1,12 @@
|
|
1
|
+
=== 0.1.0 / 2008-09-29
|
2
|
+
|
3
|
+
* Renamed freebase to free-scrape, in order to not conflict with the other
|
4
|
+
freebase gem from the freebaseapi project.
|
5
|
+
|
6
|
+
=== 0.0.9 / 2008-09-28
|
7
|
+
|
8
|
+
* Initial release.
|
9
|
+
* Can request items from freebase.com using either a URL, an Item GUID or an
|
10
|
+
Item name.
|
11
|
+
* Preserves tags and other freebase links.
|
12
|
+
|
data/Manifest.txt
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
History.txt
|
2
|
+
Manifest.txt
|
3
|
+
README.txt
|
4
|
+
Rakefile
|
5
|
+
lib/free_scrape.rb
|
6
|
+
lib/free_scrape/item_link.rb
|
7
|
+
lib/free_scrape/category.rb
|
8
|
+
lib/free_scrape/item.rb
|
9
|
+
lib/free_scrape/free_scrape.rb
|
10
|
+
lib/free_scrape/version.rb
|
11
|
+
tasks/spec.rb
|
12
|
+
spec/item_spec.rb
|
13
|
+
spec/free_scrape_spec.rb
|
14
|
+
spec/spec_helper.rb
|
data/README.txt
ADDED
@@ -0,0 +1,35 @@
|
|
1
|
+
= FreeBase
|
2
|
+
|
3
|
+
* http://freebase.rubyforge.org/
|
4
|
+
* Postmodern (postmodern.mod3@gmail.com)
|
5
|
+
|
6
|
+
== DESCRIPTION:
|
7
|
+
|
8
|
+
A web-scraping interface to freebase.com, the open and shared database of the
|
9
|
+
world's knowledge.
|
10
|
+
|
11
|
+
== FEATURES/PROBLEMS:
|
12
|
+
|
13
|
+
* Can request items from freebase.com using either a URL, an Item GUID or an
|
14
|
+
Item name.
|
15
|
+
* Preserves tags and other freebase links.
|
16
|
+
|
17
|
+
== EXAMPLES:
|
18
|
+
|
19
|
+
require 'free_scrape'
|
20
|
+
|
21
|
+
FreeScrape.item('Aphex Twin')
|
22
|
+
# => #<FreeScrape::Item:0xb73fdba0 ...>
|
23
|
+
|
24
|
+
FreeScrape.item('http://www.freebase.com/view/guid/9202a8c04000641f8000000003ac957f')
|
25
|
+
# => #<FreeScrape::Item:0xb73fe3dc ...>
|
26
|
+
|
27
|
+
== REQUIREMENTS:
|
28
|
+
|
29
|
+
* Hpricot
|
30
|
+
* WWW::Mechanize
|
31
|
+
|
32
|
+
== INSTALL:
|
33
|
+
|
34
|
+
$ sudo gem install free-scrape
|
35
|
+
|
data/Rakefile
ADDED
@@ -0,0 +1,15 @@
|
|
1
|
+
# -*- ruby -*-
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
require 'hoe'
|
5
|
+
require './tasks/spec.rb'
|
6
|
+
require './lib/free_scrape/version.rb'
|
7
|
+
|
8
|
+
Hoe.new('free-scrape', FreeScrape::VERSION) do |p|
|
9
|
+
p.rubyforge_name = 'freebase'
|
10
|
+
p.remote_rdoc_dir = ''
|
11
|
+
p.developer('Postmodern Modulus III', 'postmodern.mod3@gmail.com')
|
12
|
+
p.extra_deps = ['hpricot', 'mechanize']
|
13
|
+
end
|
14
|
+
|
15
|
+
# vim: syntax=Ruby
|
data/lib/free_scrape.rb
ADDED
@@ -0,0 +1,33 @@
|
|
1
|
+
module FreeScrape
|
2
|
+
class Category
|
3
|
+
|
4
|
+
# Name of the category
|
5
|
+
attr_reader :name
|
6
|
+
|
7
|
+
# Metadata associated with the category
|
8
|
+
attr_reader :metadata
|
9
|
+
|
10
|
+
#
|
11
|
+
# Creates a new Category object with the specified _name_.
|
12
|
+
#
|
13
|
+
def initialize(name)
|
14
|
+
@name = name
|
15
|
+
@metadata = {}
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Returns the metadata with the specified _name_.
|
20
|
+
#
|
21
|
+
def [](name)
|
22
|
+
@metadata[name]
|
23
|
+
end
|
24
|
+
|
25
|
+
#
|
26
|
+
# Returns the name of the category in +String+ form.
|
27
|
+
#
|
28
|
+
def to_s
|
29
|
+
@name.to_s
|
30
|
+
end
|
31
|
+
|
32
|
+
end
|
33
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'free_scrape/item'
|
2
|
+
require 'free_scrape/item_link'
|
3
|
+
|
4
|
+
require 'www/mechanize'
|
5
|
+
require 'hpricot'
|
6
|
+
require 'open-uri'
|
7
|
+
|
8
|
+
module FreeScrape
|
9
|
+
# Common proxy port
|
10
|
+
COMMON_PROXY_PORT = 8080
|
11
|
+
|
12
|
+
# Default language
|
13
|
+
DEFAULT_LANGUAGE = :en
|
14
|
+
|
15
|
+
#
|
16
|
+
# Returns the +Hash+ of proxy information.
|
17
|
+
#
|
18
|
+
def FreeScrape.proxy
|
19
|
+
@@free_scrape_proxy ||= {
|
20
|
+
:host => nil,
|
21
|
+
:port => COMMON_PROXY_PORT,
|
22
|
+
:user => nil,
|
23
|
+
:password => nil
|
24
|
+
}
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# Creates a HTTP URI based from the given _proxy_info_ hash. The
|
29
|
+
# _proxy_info_ hash defaults to Web.proxy, if not given.
|
30
|
+
#
|
31
|
+
# _proxy_info_ may contain the following keys:
|
32
|
+
# <tt>:host</tt>:: The proxy host.
|
33
|
+
# <tt>:port</tt>:: The proxy port. Defaults to COMMON_PROXY_PORT,
|
34
|
+
# if not specified.
|
35
|
+
# <tt>:user</tt>:: The user-name to login as.
|
36
|
+
# <tt>:password</tt>:: The password to login with.
|
37
|
+
#
|
38
|
+
def FreeScrape.proxy_uri(proxy_info=FreeScrape.proxy)
|
39
|
+
if FreeScrape.proxy[:host]
|
40
|
+
return URI::HTTP.build(:host => FreeScrape.proxy[:host],
|
41
|
+
:port => FreeScrape.proxy[:port],
|
42
|
+
:userinfo => "#{FreeScrape.proxy[:user]}:#{FreeScrape.proxy[:password]}",
|
43
|
+
:path => '/')
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
#
|
48
|
+
# Returns the supported FreeScrape User-Agent Aliases.
|
49
|
+
#
|
50
|
+
def FreeScrape.user_agent_aliases
|
51
|
+
WWW::Mechanize::AGENT_ALIASES
|
52
|
+
end
|
53
|
+
|
54
|
+
#
|
55
|
+
# Returns the FreeScrape User-Agent
|
56
|
+
#
|
57
|
+
def FreeScrape.user_agent
|
58
|
+
@@free_scrape_user_agent ||= FreeScrape.user_agent_aliases['Windows IE 6']
|
59
|
+
end
|
60
|
+
|
61
|
+
#
|
62
|
+
# Sets the FreeScrape User-Agent to the specified _agent_.
|
63
|
+
#
|
64
|
+
def FreeScrape.user_agent=(agent)
|
65
|
+
@@free_scrape_user_agent = agent
|
66
|
+
end
|
67
|
+
|
68
|
+
#
|
69
|
+
# Sets the FreeScrape User-Agent using the specified user-agent alias
|
70
|
+
# _name_.
|
71
|
+
#
|
72
|
+
def FreeScrape.user_agent_alias=(name)
|
73
|
+
@@free_scrape_user_agent = FreeScrape.user_agent_aliases[name.to_s]
|
74
|
+
end
|
75
|
+
|
76
|
+
#
|
77
|
+
# Opens the _uri_ with the given _options_. The contents of the _uri_
|
78
|
+
# will be returned.
|
79
|
+
#
|
80
|
+
# _options_ may contain the following keys:
|
81
|
+
# <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
|
82
|
+
# <tt>:user_agent</tt>:: The User-Agent String to use.
|
83
|
+
# <tt>:proxy</tt>:: A +Hash+ of proxy information which may
|
84
|
+
# contain the following keys:
|
85
|
+
# <tt>:host</tt>:: The proxy host.
|
86
|
+
# <tt>:port</tt>:: The proxy port.
|
87
|
+
# <tt>:user</tt>:: The user-name to login as.
|
88
|
+
# <tt>:password</tt>:: The password to login with.
|
89
|
+
#
|
90
|
+
# FreeScrape.open_uri('http://www.hackety.org/')
|
91
|
+
#
|
92
|
+
# FreeScrape.open_uri('http://tenderlovemaking.com/',
|
93
|
+
# :user_agent_alias => 'Linux Mozilla')
|
94
|
+
# FreeScrape.open_uri('http://www.wired.com/',
|
95
|
+
# :user_agent => 'the future')
|
96
|
+
#
|
97
|
+
def FreeScrape.open_uri(uri,options={})
|
98
|
+
headers = {}
|
99
|
+
|
100
|
+
if options[:user_agent_alias]
|
101
|
+
headers['User-Agent'] = WWW::Mechanize::AGENT_ALIASES[options[:user_agent_alias]]
|
102
|
+
elsif options[:user_agent]
|
103
|
+
headers['User-Agent'] = options[:user_agent]
|
104
|
+
elsif FreeScrape.user_agent
|
105
|
+
headers['User-Agent'] = FreeScrape.user_agent
|
106
|
+
end
|
107
|
+
|
108
|
+
proxy = (options[:proxy] || FreeScrape.proxy)
|
109
|
+
if proxy[:host]
|
110
|
+
headers[:proxy] = FreeScrape.proxy_uri(proxy)
|
111
|
+
end
|
112
|
+
|
113
|
+
return Kernel.open(uri,headers)
|
114
|
+
end
|
115
|
+
|
116
|
+
#
|
117
|
+
# Similar to FreeScrape.open_uri but returns an Hpricot document.
|
118
|
+
#
|
119
|
+
def FreeScrape.open_page(uri,options={})
|
120
|
+
Hpricot(FreeScrape.open_uri(uri,options))
|
121
|
+
end
|
122
|
+
|
123
|
+
#
|
124
|
+
# Creates a new WWW::Mechanize agent with the given _options_.
|
125
|
+
#
|
126
|
+
# _options_ may contain the following keys:
|
127
|
+
# <tt>:user_agent_alias</tt>:: The User-Agent Alias to use.
|
128
|
+
# <tt>:user_agent</tt>:: The User-Agent string to use.
|
129
|
+
# <tt>:proxy</tt>:: A +Hash+ of proxy information which may
|
130
|
+
# contain the following keys:
|
131
|
+
# <tt>:host</tt>:: The proxy host.
|
132
|
+
# <tt>:port</tt>:: The proxy port.
|
133
|
+
# <tt>:user</tt>:: The user-name to login as.
|
134
|
+
# <tt>:password</tt>:: The password to login with.
|
135
|
+
#
|
136
|
+
# FreeScrape.web_agent
|
137
|
+
#
|
138
|
+
# FreeScrape.web_agent(:user_agent_alias => 'Linux Mozilla')
|
139
|
+
# FreeScrape.web_agent(:user_agent => 'Google Bot')
|
140
|
+
#
|
141
|
+
def FreeScrape.web_agent(options={},&block)
|
142
|
+
agent = WWW::Mechanize.new
|
143
|
+
|
144
|
+
if options[:user_agent_alias]
|
145
|
+
agent.user_agent_alias = options[:user_agent_alias]
|
146
|
+
elsif options[:user_agent]
|
147
|
+
agent.user_agent = options[:user_agent]
|
148
|
+
elsif FreeScrape.user_agent
|
149
|
+
agent.user_agent = FreeScrape.user_agent
|
150
|
+
end
|
151
|
+
|
152
|
+
proxy = (options[:proxy] || FreeScrape.proxy)
|
153
|
+
if proxy[:host]
|
154
|
+
agent.set_proxy(proxy[:host],proxy[:port],proxy[:user],proxy[:password])
|
155
|
+
end
|
156
|
+
|
157
|
+
block.call(agent) if block
|
158
|
+
return agent
|
159
|
+
end
|
160
|
+
|
161
|
+
#
|
162
|
+
# Returns the language to access FreeScrape with.
|
163
|
+
#
|
164
|
+
def FreeScrape.language
|
165
|
+
@@free_scrape_language ||= DEFAULT_LANGUAGE
|
166
|
+
end
|
167
|
+
|
168
|
+
#
|
169
|
+
# Sets the language to access FreeScrape with to the _new_language_.
|
170
|
+
#
|
171
|
+
def FreeScrape.language=(new_language)
|
172
|
+
@@free_scrape_language = new_language.to_sym
|
173
|
+
end
|
174
|
+
|
175
|
+
#
|
176
|
+
# Returns the Item with the specified _descriptor_, which can be either
|
177
|
+
# a URI to freebase.com, an Item GUID or an Item name.
|
178
|
+
#
|
179
|
+
# FreeScrape.item('Aphex Twin')
|
180
|
+
# # => #<FreeScrape::Item:0xb73fdba0 ...>
|
181
|
+
#
|
182
|
+
def FreeScrape.item(descriptor)
|
183
|
+
Item.from(descriptor)
|
184
|
+
end
|
185
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
require 'free_scrape/item_link'
|
2
|
+
require 'free_scrape/category'
|
3
|
+
|
4
|
+
require 'uri'
|
5
|
+
|
6
|
+
module FreeScrape
|
7
|
+
class Item
|
8
|
+
|
9
|
+
# URL of the item
|
10
|
+
attr_reader :url
|
11
|
+
|
12
|
+
# Name of the item
|
13
|
+
attr_accessor :name
|
14
|
+
|
15
|
+
# Description of the item
|
16
|
+
attr_accessor :summary
|
17
|
+
|
18
|
+
# Categories the item is in
|
19
|
+
attr_reader :categories
|
20
|
+
|
21
|
+
#
|
22
|
+
# Creates a new Item object with the specified _name_ and the given
|
23
|
+
# _options_.
|
24
|
+
#
|
25
|
+
def initialize(url,options={})
|
26
|
+
@url = url
|
27
|
+
@name = options[:name]
|
28
|
+
@summary = options[:summary]
|
29
|
+
|
30
|
+
@categories = {}
|
31
|
+
end
|
32
|
+
|
33
|
+
#
|
34
|
+
# Returns the Item object with the specified _descriptor_, which can
|
35
|
+
# be either a URI to freebase.com, an Item GUID or an Item name.
|
36
|
+
#
|
37
|
+
def Item.from(descriptor)
|
38
|
+
descriptor = descriptor.to_s
|
39
|
+
|
40
|
+
if descriptor =~ /^[0-9a-f]+$/
|
41
|
+
return Item.guid(descriptor)
|
42
|
+
elsif descriptor =~ /^http(s)?:\/\/(www\.)?freebase.com\/view/
|
43
|
+
return Item.from_url(descriptor)
|
44
|
+
else
|
45
|
+
return Item.named(descriptor)
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
#
|
50
|
+
# Returns the Item object with the specified _name_.
|
51
|
+
#
|
52
|
+
def Item.named(name)
|
53
|
+
name = name.split(' ').map { |word|
|
54
|
+
word.downcase
|
55
|
+
}.join('_')
|
56
|
+
|
57
|
+
return Item.from_url("http://www.freebase.com/view/#{FreeScrape.language}/#{name}")
|
58
|
+
end
|
59
|
+
|
60
|
+
#
|
61
|
+
# Returns the Item object with the specified _guid_.
|
62
|
+
#
|
63
|
+
def Item.guid(guid)
|
64
|
+
Item.from_url("http://www.freebase.com/view/guid/#{guid}")
|
65
|
+
end
|
66
|
+
|
67
|
+
#
|
68
|
+
# Creates the Item at the specified _url_.
|
69
|
+
#
|
70
|
+
# Item.from_url('http://www.freebase.com/view/guid/9202a8c04000641f800000000301146f')
|
71
|
+
# # => #<FreeScrape::Item:0xb73fdba0 ...>
|
72
|
+
#
|
73
|
+
def Item.from_url(url)
|
74
|
+
url = URI(url.to_s)
|
75
|
+
page = FreeScrape.open_page(url)
|
76
|
+
new_item = Item.new(url)
|
77
|
+
|
78
|
+
content = page.at('#content_main')
|
79
|
+
|
80
|
+
new_item.name = content.at('#title//h1').inner_text.strip
|
81
|
+
new_item.summary = content.at('#title/div.article-container/div.article').inner_html.strip
|
82
|
+
|
83
|
+
extract_value = lambda { |elem|
|
84
|
+
if (item_link = elem.at('a.pv'))
|
85
|
+
link_url = new_item.url.merge(item_link['href'])
|
86
|
+
|
87
|
+
ItemLink.new(item_link.inner_text.strip, link_url)
|
88
|
+
elsif elem.at('a.detail-view').nil?
|
89
|
+
text = elem.inner_text.strip
|
90
|
+
|
91
|
+
if text.empty?
|
92
|
+
nil
|
93
|
+
else
|
94
|
+
text
|
95
|
+
end
|
96
|
+
end
|
97
|
+
}
|
98
|
+
|
99
|
+
content.search('div.domainsboxes//div.domainbox//div.typebox-container') do |domainbox|
|
100
|
+
category_name = domainbox.at('//div.typebox-column-title/a').inner_text
|
101
|
+
new_category = Category.new(category_name)
|
102
|
+
|
103
|
+
domainbox.search('//div.prop-typebox') do |field|
|
104
|
+
field_name = field.at('//span.prop-title').inner_text
|
105
|
+
field_content = field.at('//div.prop-content')
|
106
|
+
|
107
|
+
field_value = nil
|
108
|
+
|
109
|
+
if (table = field_content.at('table.prop-table'))
|
110
|
+
field_value = []
|
111
|
+
|
112
|
+
column_names = table.search('tr/th/div.prop-table-cell').map do |div|
|
113
|
+
div.inner_text.strip
|
114
|
+
end
|
115
|
+
|
116
|
+
table.search('tr[td]') do |row|
|
117
|
+
field_row = {}
|
118
|
+
index = 0
|
119
|
+
|
120
|
+
row.search('td') do |cell|
|
121
|
+
if (value = extract_value.call(cell))
|
122
|
+
field_row[column_names[index]] = value
|
123
|
+
end
|
124
|
+
|
125
|
+
index += 1
|
126
|
+
end
|
127
|
+
|
128
|
+
field_value << field_row unless field_row.empty?
|
129
|
+
end
|
130
|
+
elsif (list = field_content.at('ul.prop-list'))
|
131
|
+
field_value = []
|
132
|
+
|
133
|
+
list.search('li.prop-list-item') do |list_item|
|
134
|
+
if (value = extract_value.call(list_item))
|
135
|
+
field_value << value
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
new_category.metadata[field_name] = field_value
|
141
|
+
end
|
142
|
+
|
143
|
+
new_item.categories[new_category.name] = new_category
|
144
|
+
end
|
145
|
+
|
146
|
+
return new_item
|
147
|
+
end
|
148
|
+
|
149
|
+
#
|
150
|
+
# Returns the category names of the item.
|
151
|
+
#
|
152
|
+
def category_names
|
153
|
+
@categories.keys
|
154
|
+
end
|
155
|
+
|
156
|
+
#
|
157
|
+
# Returns the Category with the specified _name_ of the item.
|
158
|
+
#
|
159
|
+
def [](name)
|
160
|
+
@categories[name]
|
161
|
+
end
|
162
|
+
|
163
|
+
#
|
164
|
+
# Returns the name of the item.
|
165
|
+
#
|
166
|
+
def to_s
|
167
|
+
@name.to_s
|
168
|
+
end
|
169
|
+
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,26 @@
|
|
1
|
+
module FreeScrape
|
2
|
+
class ItemLink
|
3
|
+
|
4
|
+
# Title of the item
|
5
|
+
attr_reader :title
|
6
|
+
|
7
|
+
# URL of the item
|
8
|
+
attr_reader :url
|
9
|
+
|
10
|
+
#
|
11
|
+
# Creates a new ItemLink with the specified _title_ and _url_.
|
12
|
+
#
|
13
|
+
def initialize(title,url)
|
14
|
+
@title = title
|
15
|
+
@url = url
|
16
|
+
end
|
17
|
+
|
18
|
+
#
|
19
|
+
# Returns the title of the item-link in +String+ form.
|
20
|
+
#
|
21
|
+
def to_s
|
22
|
+
@title.to_s
|
23
|
+
end
|
24
|
+
|
25
|
+
end
|
26
|
+
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe FreeScrape do
|
4
|
+
it "should have a version" do
|
5
|
+
FreeScrape.const_get('VERSION').should_not be_nil
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should have a default language" do
|
9
|
+
FreeScrape.language.should_not be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have a default User-Agent string" do
|
13
|
+
FreeScrape.user_agent.should_not be_nil
|
14
|
+
end
|
15
|
+
|
16
|
+
it "should return an item from a given URL" do
|
17
|
+
@item = FreeScrape.item('http://www.freebase.com/view/en/squarepusher/')
|
18
|
+
@item.should_not be_nil
|
19
|
+
@item.name.should == 'Squarepusher'
|
20
|
+
end
|
21
|
+
|
22
|
+
it "should return an item from a given GUID" do
|
23
|
+
@item = FreeScrape.item('9202a8c04000641f8000000000184c7a')
|
24
|
+
@item.should_not be_nil
|
25
|
+
@item.name.should == 'Conflict'
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should return an item for a given title" do
|
29
|
+
@item = FreeScrape.item('Aphex Twin')
|
30
|
+
@item.should_not be_nil
|
31
|
+
@item.name.should == 'Aphex Twin'
|
32
|
+
end
|
33
|
+
end
|
data/spec/item_spec.rb
ADDED
@@ -0,0 +1,34 @@
|
|
1
|
+
require 'spec_helper'
|
2
|
+
|
3
|
+
describe Item do
|
4
|
+
before(:all) do
|
5
|
+
@item = Item.from_url('http://www.freebase.com/view/en/aphex_twin')
|
6
|
+
end
|
7
|
+
|
8
|
+
it "should have a URL" do
|
9
|
+
@item.url.should_not be_nil
|
10
|
+
end
|
11
|
+
|
12
|
+
it "should have a name" do
|
13
|
+
@item.name.should_not be_nil
|
14
|
+
@item.name.should_not be_empty
|
15
|
+
end
|
16
|
+
|
17
|
+
it "should have categories" do
|
18
|
+
@item.categories.should_not be_empty
|
19
|
+
end
|
20
|
+
|
21
|
+
it "should have category names" do
|
22
|
+
@item.category_names.should_not be_empty
|
23
|
+
end
|
24
|
+
|
25
|
+
it "should have metadata for each category" do
|
26
|
+
@item.categories.each_value do |category|
|
27
|
+
category.metadata.should_not be_empty
|
28
|
+
|
29
|
+
category.metadata.each_value do |data|
|
30
|
+
data.should_not be_nil
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|
data/spec/spec_helper.rb
ADDED
data/tasks/spec.rb
ADDED
metadata
ADDED
@@ -0,0 +1,99 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: free-scrape
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Postmodern Modulus III
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-09-29 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: hpricot
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: "0"
|
24
|
+
version:
|
25
|
+
- !ruby/object:Gem::Dependency
|
26
|
+
name: mechanize
|
27
|
+
type: :runtime
|
28
|
+
version_requirement:
|
29
|
+
version_requirements: !ruby/object:Gem::Requirement
|
30
|
+
requirements:
|
31
|
+
- - ">="
|
32
|
+
- !ruby/object:Gem::Version
|
33
|
+
version: "0"
|
34
|
+
version:
|
35
|
+
- !ruby/object:Gem::Dependency
|
36
|
+
name: hoe
|
37
|
+
type: :development
|
38
|
+
version_requirement:
|
39
|
+
version_requirements: !ruby/object:Gem::Requirement
|
40
|
+
requirements:
|
41
|
+
- - ">="
|
42
|
+
- !ruby/object:Gem::Version
|
43
|
+
version: 1.7.0
|
44
|
+
version:
|
45
|
+
description: A web-scraping interface to freebase.com, the open and shared database of the world's knowledge.
|
46
|
+
email:
|
47
|
+
- postmodern.mod3@gmail.com
|
48
|
+
executables: []
|
49
|
+
|
50
|
+
extensions: []
|
51
|
+
|
52
|
+
extra_rdoc_files:
|
53
|
+
- History.txt
|
54
|
+
- Manifest.txt
|
55
|
+
- README.txt
|
56
|
+
files:
|
57
|
+
- History.txt
|
58
|
+
- Manifest.txt
|
59
|
+
- README.txt
|
60
|
+
- Rakefile
|
61
|
+
- lib/free_scrape.rb
|
62
|
+
- lib/free_scrape/item_link.rb
|
63
|
+
- lib/free_scrape/category.rb
|
64
|
+
- lib/free_scrape/item.rb
|
65
|
+
- lib/free_scrape/free_scrape.rb
|
66
|
+
- lib/free_scrape/version.rb
|
67
|
+
- tasks/spec.rb
|
68
|
+
- spec/item_spec.rb
|
69
|
+
- spec/free_scrape_spec.rb
|
70
|
+
- spec/spec_helper.rb
|
71
|
+
has_rdoc: true
|
72
|
+
homepage: http://freebase.rubyforge.org/
|
73
|
+
post_install_message:
|
74
|
+
rdoc_options:
|
75
|
+
- --main
|
76
|
+
- README.txt
|
77
|
+
require_paths:
|
78
|
+
- lib
|
79
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
80
|
+
requirements:
|
81
|
+
- - ">="
|
82
|
+
- !ruby/object:Gem::Version
|
83
|
+
version: "0"
|
84
|
+
version:
|
85
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
86
|
+
requirements:
|
87
|
+
- - ">="
|
88
|
+
- !ruby/object:Gem::Version
|
89
|
+
version: "0"
|
90
|
+
version:
|
91
|
+
requirements: []
|
92
|
+
|
93
|
+
rubyforge_project: freebase
|
94
|
+
rubygems_version: 1.2.0
|
95
|
+
signing_key:
|
96
|
+
specification_version: 2
|
97
|
+
summary: A web-scraping interface to freebase.com, the open and shared database of the world's knowledge.
|
98
|
+
test_files: []
|
99
|
+
|