muddyit_fu 0.2.11 → 0.2.12
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +51 -16
- data/VERSION +1 -1
- data/lib/muddyit/base.rb +24 -14
- data/muddyit_fu.gemspec +4 -3
- data/test/test_muddyit_fu.rb +128 -114
- data/test/thing.rb +13 -0
- metadata +3 -2
data/README.rdoc
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
Muddy is an information extraction platform. For further
|
4
4
|
details see the '{Getting Started with Muddy}[http://blog.muddy.it/2009/11/getting-started-with-muddy]'
|
5
|
-
article. This gem provides access to the Muddy platform via it's API
|
6
|
-
|
7
|
-
{Muddy Developer Guide}[http://muddy.it/developers/]
|
5
|
+
article. This gem provides access to the Muddy platform via it's API (see {Muddy Developer Guide}[http://muddy.it/developers/]).
|
8
6
|
|
9
7
|
== Installation
|
10
8
|
|
@@ -16,7 +14,7 @@ article. This gem provides access to the Muddy platform via it's API :
|
|
16
14
|
|
17
15
|
Muddy supports OAuth and HTTP Basic auth for authentication and authorisation.
|
18
16
|
We recommend you use OAuth wherever possible when accessing Muddy. An example
|
19
|
-
of using OAuth with the
|
17
|
+
of using OAuth with the Muddy platform is described in the
|
20
18
|
{Building with Muddy and OAuth}[http://blog.muddy.it/2010/01/building-with-muddy-and-oauth]
|
21
19
|
article.
|
22
20
|
|
@@ -59,7 +57,7 @@ URL rather than text, just specify a URL instead :
|
|
59
57
|
|
60
58
|
Muddy allows you to store the entity extraction results so aggregate operations
|
61
59
|
can be performed over a collection of content (a 'collection' has many analysed 'pages').
|
62
|
-
A basic
|
60
|
+
A basic Muddy account provides a single 'collection' where extraction results
|
63
61
|
can be stored.
|
64
62
|
|
65
63
|
To store a page against a collection, the collection must first be found :
|
@@ -70,7 +68,16 @@ Once a collection has been found, entity extraction results can be stored in it:
|
|
70
68
|
|
71
69
|
collection.pages.create('http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm', {:minium_confidence => 0.2})
|
72
70
|
|
73
|
-
==
|
71
|
+
== Working with a collection
|
72
|
+
|
73
|
+
A collection allows aggregate operations to be perfomed on itself and on it's
|
74
|
+
members. A collection is identified by it's 'collection token'. This is an
|
75
|
+
alphanumeric six character string (e.g. 'a0ret4'). A collection can be found if
|
76
|
+
it's token is known :
|
77
|
+
|
78
|
+
collection = muddyit.collections.find('a0ret4')
|
79
|
+
|
80
|
+
=== Viewing all analysed pages
|
74
81
|
|
75
82
|
You can iterate through all the analysed pages in a collection, be aware that
|
76
83
|
the Muddy API provides the pages as paginated sets, so it may take some time to
|
@@ -87,25 +94,42 @@ for each new paginated set of results).
|
|
87
94
|
end
|
88
95
|
end
|
89
96
|
|
90
|
-
|
97
|
+
=== Finding a particular page or pages
|
91
98
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
it's token is known :
|
99
|
+
Each page in a collection is assigned a unique alphanumeric identifier. Whilst
|
100
|
+
this can be used to find a given page in a collection, it is possible to search
|
101
|
+
for the page using other attributes :
|
96
102
|
|
97
|
-
|
103
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
104
|
+
page = collection.pages.find(:all, :uri => 'http://news.bbc.co.uk/1/hi/business/8186840.stm').first
|
105
|
+
page = collection.pages.find(:all, :title => 'BBC NEWS | Business | ITV in 25m Friends Reunited sale').first
|
106
|
+
|
107
|
+
=== Rereshing a page's results
|
108
|
+
|
109
|
+
A page can be 'refereshed' (the entity extraction is run again) by calling the
|
110
|
+
refresh method on a page object :
|
111
|
+
|
112
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
113
|
+
updated_page = page.update
|
114
|
+
|
115
|
+
=== Deleting a page from a collection
|
98
116
|
|
99
|
-
|
117
|
+
A page can be removed from a collection by calling the 'destroy' method on a
|
118
|
+
page object :
|
100
119
|
|
101
|
-
|
120
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
121
|
+
page.destroy
|
122
|
+
|
123
|
+
=== View all pages containing entity 'Gordon Brown'
|
124
|
+
|
125
|
+
If we want to find all pages that reference the grounded entity for 'Gordon Brown' then
|
102
126
|
it can be searched for using it's DBpedia URI :
|
103
127
|
|
104
128
|
require 'muddyit_fu'
|
105
129
|
muddyit = Muddyit.new('./config.yml')
|
106
130
|
collection = muddyit.collections.find('a0ret4')
|
107
131
|
collection.pages.find_by_entity('http://dbpedia.org/resource/Gordon_Brown') do |page|
|
108
|
-
puts page.identifier
|
132
|
+
puts "#{page.identifier} - #{page.title}"
|
109
133
|
end
|
110
134
|
|
111
135
|
=== Find related entities for 'Gordon Brown'
|
@@ -118,7 +142,7 @@ collection :
|
|
118
142
|
collection = muddyit.collections.find('a0ret4')
|
119
143
|
puts "Related entity\tOccurance
|
120
144
|
collection.entities.find_related('http://dbpedia.org/resource/Gordon_Brown').each do |entry|
|
121
|
-
puts "#{entry[:
|
145
|
+
puts "#{entry[:entity].uri}\t#{entry[:count]}"
|
122
146
|
end
|
123
147
|
|
124
148
|
=== Find related content for : http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
|
@@ -135,6 +159,17 @@ analysed page that has a uri 'http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
|
|
135
159
|
puts "#{results[:page].title} #{results[:count]}"
|
136
160
|
end
|
137
161
|
|
162
|
+
== Batch processing content and the Muddy queue
|
163
|
+
|
164
|
+
The Muddy platform runs a background job queue that allows many requests to be
|
165
|
+
made in quick succession (rather than waiting for the full extraction request to
|
166
|
+
complete), with analysis of the pages happening asynchronously via the queue
|
167
|
+
and being stored in the collection at a later time. This can be useful when trying
|
168
|
+
to analyse large content collections. To send a request to the queue use :
|
169
|
+
|
170
|
+
collection = muddyit.collections.find('a0ret4')
|
171
|
+
collection.pages.create('http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm', {:realtime => false})
|
172
|
+
|
138
173
|
== Contact
|
139
174
|
|
140
175
|
Author: Rob Lee
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.12
|
data/lib/muddyit/base.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
module Muddyit
|
2
2
|
|
3
|
+
class_attr_accessor :REST_ENDPOINT
|
4
|
+
|
5
|
+
@@REST_ENDPOINT = 'http://muddy.it'
|
6
|
+
|
3
7
|
def self.new(*params)
|
4
8
|
Muddyit::Base.new(*params)
|
5
9
|
end
|
6
10
|
|
11
|
+
# Shortcut class method for extract
|
12
|
+
def self.extract(doc, options={})
|
13
|
+
@muddyit = Muddyit.new()
|
14
|
+
@muddyit.extract(doc, options)
|
15
|
+
end
|
16
|
+
|
7
17
|
class Base
|
8
18
|
class_attr_accessor :http_open_timeout
|
9
19
|
class_attr_accessor :http_read_timeout
|
@@ -13,8 +23,6 @@ module Muddyit
|
|
13
23
|
@@http_open_timeout = 120
|
14
24
|
@@http_read_timeout = 120
|
15
25
|
|
16
|
-
REST_ENDPOINT = 'http://www.muddy.it'
|
17
|
-
|
18
26
|
# Set the request signing method
|
19
27
|
@@digest1 = OpenSSL::Digest::Digest.new("sha1")
|
20
28
|
@@digest256 = nil
|
@@ -47,7 +55,8 @@ module Muddyit
|
|
47
55
|
# access_token: CCC
|
48
56
|
# access_token_secret: DDD
|
49
57
|
#
|
50
|
-
def initialize(config_hash_or_file)
|
58
|
+
def initialize(config_hash_or_file = {})
|
59
|
+
|
51
60
|
if config_hash_or_file.is_a? Hash
|
52
61
|
config_hash_or_file.nested_symbolize_keys!
|
53
62
|
@username = config_hash_or_file[:username]
|
@@ -56,7 +65,7 @@ module Muddyit
|
|
56
65
|
@consumer_secret = config_hash_or_file[:consumer_secret]
|
57
66
|
@access_token = config_hash_or_file[:access_token]
|
58
67
|
@access_token_secret = config_hash_or_file[:access_token_secret]
|
59
|
-
@rest_endpoint = config_hash_or_file.
|
68
|
+
@rest_endpoint = config_hash_or_file.key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : Muddyit.REST_ENDPOINT
|
60
69
|
else
|
61
70
|
config = YAML.load_file(config_hash_or_file)
|
62
71
|
config.nested_symbolize_keys!
|
@@ -66,7 +75,7 @@ module Muddyit
|
|
66
75
|
@consumer_secret = config[:consumer_secret]
|
67
76
|
@access_token = config[:access_token]
|
68
77
|
@access_token_secret = config[:access_token_secret]
|
69
|
-
@rest_endpoint = config.
|
78
|
+
@rest_endpoint = config.key?(:rest_endpoint) ? config[:rest_endpoint] : Muddyit.REST_ENDPOINT
|
70
79
|
end
|
71
80
|
|
72
81
|
if !@consumer_key.nil?
|
@@ -75,10 +84,7 @@ module Muddyit
|
|
75
84
|
@accesstoken = ::OAuth::AccessToken.new(@consumer, @access_token, @access_token_secret)
|
76
85
|
elsif !@username.nil?
|
77
86
|
@auth_type = :basic
|
78
|
-
else
|
79
|
-
raise "unable to find authentication credentials"
|
80
87
|
end
|
81
|
-
|
82
88
|
end
|
83
89
|
|
84
90
|
# sends a request to the muddyit REST api
|
@@ -99,7 +105,7 @@ module Muddyit
|
|
99
105
|
case @auth_type
|
100
106
|
when :oauth
|
101
107
|
res = oauth_request_over_http(api_url, http_method, opts, body)
|
102
|
-
when :basic
|
108
|
+
when :basic, nil
|
103
109
|
res = basic_request_over_http(api_url, http_method, opts, body)
|
104
110
|
end
|
105
111
|
|
@@ -149,7 +155,7 @@ module Muddyit
|
|
149
155
|
response = self.send_request(api_url, :post, {}, body.to_json)
|
150
156
|
return Muddyit::Collections::Collection::Pages::Page.new(self, response)
|
151
157
|
end
|
152
|
-
|
158
|
+
|
153
159
|
protected
|
154
160
|
|
155
161
|
# For easier testing. You can mock this method with a XML file you re expecting to receive
|
@@ -175,6 +181,12 @@ module Muddyit
|
|
175
181
|
|
176
182
|
def basic_request_over_http(path, http_method, opts, data)
|
177
183
|
|
184
|
+
# We only allow access to /extract as an unauthenticated user
|
185
|
+
# all other paths should raise an error
|
186
|
+
if @auth_type == nil && path != '/extract'
|
187
|
+
raise "invalid authentication credentials supplied, are the details correct ?"
|
188
|
+
end
|
189
|
+
|
178
190
|
http_opts = { "Accept" => "application/json", "Content-Type" => "application/json", "User-Agent" => "muddyit_fu" }
|
179
191
|
query_string = opts.to_a.map {|x| x.join("=")}.join("&")
|
180
192
|
|
@@ -196,14 +208,12 @@ module Muddyit
|
|
196
208
|
request.basic_auth @username, @password
|
197
209
|
request["Content-Length"] = 0 # Default to 0
|
198
210
|
when :get
|
199
|
-
|
211
|
+
path_with_query_string = opts.empty? ? path : "#{path}?#{query_string}"
|
212
|
+
request = Net::HTTP::Get.new(path_with_query_string, headers)
|
200
213
|
request.basic_auth @username, @password
|
201
214
|
when :delete
|
202
215
|
request = Net::HTTP::Delete.new(path,headers)
|
203
216
|
request.basic_auth @username, @password
|
204
|
-
when :head
|
205
|
-
request = Net::HTTP::Head.new(path,headers)
|
206
|
-
request.basic_auth @username, @password
|
207
217
|
else
|
208
218
|
raise ArgumentError, "Don't know how to handle http_method: :#{http_method.to_s}"
|
209
219
|
end
|
data/muddyit_fu.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{muddyit_fu}
|
5
|
-
s.version = "0.2.
|
5
|
+
s.version = "0.2.12"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["rattle"]
|
9
|
-
s.date = %q{2010-01-
|
9
|
+
s.date = %q{2010-01-18}
|
10
10
|
s.email = %q{support[at]muddy.it}
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
@@ -45,7 +45,8 @@ Gem::Specification.new do |s|
|
|
45
45
|
s.rubygems_version = %q{1.3.5}
|
46
46
|
s.summary = %q{Provides a ruby interface to muddy.it}
|
47
47
|
s.test_files = [
|
48
|
-
"test/
|
48
|
+
"test/thing.rb",
|
49
|
+
"test/test_muddyit_fu.rb",
|
49
50
|
"test/test_helper.rb",
|
50
51
|
"examples/newsindexer.rb",
|
51
52
|
"examples/oauth.rb"
|
data/test/test_muddyit_fu.rb
CHANGED
@@ -6,143 +6,157 @@ class TestMuddyitFu < Test::Unit::TestCase
|
|
6
6
|
@@COLLECTION_LABEL = Time.now.to_s
|
7
7
|
@@STORY = 'http://news.bbc.co.uk/1/hi/business/8186840.stm'
|
8
8
|
|
9
|
-
context 'A muddy account' do
|
9
|
+
context 'A user without a muddy account' do
|
10
10
|
|
11
11
|
setup do
|
12
12
|
c = load_config
|
13
|
-
|
14
|
-
@muddyit = Muddyit.new(:consumer_key => c['consumer_key'],
|
15
|
-
:consumer_secret => c['consumer_secret'],
|
16
|
-
:access_token => c['access_token'],
|
17
|
-
:access_token_secret => c['access_token_secret'],
|
18
|
-
:rest_endpoint => c['rest_endpoint'],
|
19
|
-
:username => c['username'],
|
20
|
-
:password => c['password'])
|
21
|
-
rescue
|
22
|
-
puts "Failed to connect to muddy, are the details correct ?"
|
23
|
-
end
|
13
|
+
Muddyit.REST_ENDPOINT = c['rest_endpoint'] if c.key?('rest_endpoint')
|
24
14
|
end
|
25
15
|
|
26
|
-
should "analyse a page without a collection" do
|
27
|
-
page =
|
16
|
+
should "be able to analyse a page without a collection" do
|
17
|
+
page = Muddyit.extract(@@STORY)
|
28
18
|
assert page.entities.length > 0
|
29
19
|
end
|
30
20
|
|
31
|
-
|
32
|
-
collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
33
|
-
assert !collection.token.nil?
|
34
|
-
end
|
35
|
-
|
36
|
-
should 'be able to find a collection' do
|
37
|
-
# This is a bit rubbish
|
38
|
-
@muddyit.collections.find(:all).each do |collection|
|
39
|
-
if collection.label == @@COLLECTION_LABEL
|
40
|
-
assert true
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
should 'be able to destroy a collection' do
|
46
|
-
# This is also a bit rubbish
|
47
|
-
collections = @muddyit.collections.find(:all)
|
48
|
-
collections.each do |collection|
|
49
|
-
if collection.label == @@COLLECTION_LABEL
|
50
|
-
res = collection.destroy
|
51
|
-
assert_equal res.code, "200"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
context "with a collection" do
|
21
|
+
end
|
57
22
|
|
23
|
+
context 'A user with a muddy account' do
|
24
|
+
|
58
25
|
setup do
|
59
|
-
|
26
|
+
c = load_config
|
27
|
+
begin
|
28
|
+
@muddyit = Muddyit.new(:consumer_key => c['consumer_key'],
|
29
|
+
:consumer_secret => c['consumer_secret'],
|
30
|
+
:access_token => c['access_token'],
|
31
|
+
:access_token_secret => c['access_token_secret'],
|
32
|
+
:rest_endpoint => c['rest_endpoint'],
|
33
|
+
:username => c['username'],
|
34
|
+
:password => c['password'])
|
35
|
+
rescue
|
36
|
+
puts "Failed to connect to muddy, are the details correct ?"
|
37
|
+
end
|
60
38
|
end
|
61
|
-
|
62
|
-
should "
|
63
|
-
page = @
|
39
|
+
|
40
|
+
should "be able to analyse a page without a collection" do
|
41
|
+
page = @muddyit.extract(@@STORY)
|
64
42
|
assert page.entities.length > 0
|
65
|
-
pages = @collection.pages.find(:all)
|
66
|
-
assert pages[:pages].length == 0
|
67
43
|
end
|
68
|
-
|
69
|
-
should
|
70
|
-
|
71
|
-
assert
|
72
|
-
pages = @collection.pages.find(:all)
|
73
|
-
assert_equal pages[:pages].length, 1
|
44
|
+
|
45
|
+
should 'be able to create a collection' do
|
46
|
+
collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
47
|
+
assert !collection.token.nil?
|
74
48
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
49
|
+
|
50
|
+
should 'be able to find a collection' do
|
51
|
+
# This is a bit rubbish
|
52
|
+
@muddyit.collections.find(:all).each do |collection|
|
53
|
+
if collection.label == @@COLLECTION_LABEL
|
54
|
+
assert true
|
55
|
+
end
|
80
56
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
assert !@page.uri.nil?
|
92
|
-
#assert !@page.token.nil?
|
93
|
-
# More attributes here ?
|
57
|
+
end
|
58
|
+
|
59
|
+
should 'be able to destroy a collection' do
|
60
|
+
# This is also a bit rubbish
|
61
|
+
collections = @muddyit.collections.find(:all)
|
62
|
+
collections.each do |collection|
|
63
|
+
if collection.label == @@COLLECTION_LABEL
|
64
|
+
res = collection.destroy
|
65
|
+
assert_equal res.code, "200"
|
66
|
+
end
|
94
67
|
end
|
95
|
-
|
96
|
-
|
97
|
-
|
68
|
+
end
|
69
|
+
|
70
|
+
context "with a collection" do
|
71
|
+
|
72
|
+
setup do
|
73
|
+
@collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
98
74
|
end
|
99
|
-
|
100
|
-
should "
|
101
|
-
|
102
|
-
assert
|
103
|
-
|
75
|
+
|
76
|
+
should "categorise a page in realtime and not store it" do
|
77
|
+
page = @collection.pages.create({:uri => @@STORY}, :realtime => true, :store => false)
|
78
|
+
assert page.entities.length > 0
|
79
|
+
pages = @collection.pages.find(:all)
|
80
|
+
assert pages[:pages].length == 0
|
104
81
|
end
|
105
|
-
|
106
|
-
should "
|
107
|
-
|
108
|
-
assert
|
109
|
-
|
110
|
-
|
82
|
+
|
83
|
+
should "categorise a page in realtime and store it" do
|
84
|
+
page = @collection.pages.create({:uri => @@STORY}, :realtime => true, :store => true)
|
85
|
+
assert page.entities.length > 0
|
86
|
+
pages = @collection.pages.find(:all)
|
87
|
+
assert_equal pages[:pages].length, 1
|
111
88
|
end
|
112
|
-
|
113
|
-
|
114
|
-
|
89
|
+
|
90
|
+
context "with a page" do
|
91
|
+
|
92
|
+
setup do
|
93
|
+
@page = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
94
|
+
end
|
95
|
+
|
96
|
+
should "find a page" do
|
97
|
+
assert_equal @collection.pages.find(@page.identifier).identifier, @page.identifier
|
98
|
+
end
|
99
|
+
|
100
|
+
should "have page attributes" do
|
101
|
+
assert !@page.identifier.nil?
|
102
|
+
assert !@page.title.nil?
|
103
|
+
assert !@page.created_at.nil?
|
104
|
+
assert !@page.content.nil?
|
105
|
+
assert !@page.uri.nil?
|
106
|
+
#assert !@page.token.nil?
|
107
|
+
# More attributes here ?
|
108
|
+
end
|
109
|
+
|
110
|
+
should "have many entities" do
|
111
|
+
assert @page.entities.length > 0
|
112
|
+
end
|
113
|
+
|
114
|
+
should "have an entity with a term and label" do
|
115
|
+
entity = @page.entities.first
|
116
|
+
assert !entity.term.nil?
|
117
|
+
assert !entity.uri.nil?
|
118
|
+
end
|
119
|
+
|
120
|
+
should "have extracted content" do
|
121
|
+
assert !@page.extracted_content.content.nil?
|
122
|
+
assert @page.extracted_content.terms.length > 0
|
123
|
+
assert @page.extracted_content.start_position > 0
|
124
|
+
assert @page.extracted_content.end_position > 0
|
125
|
+
end
|
126
|
+
|
127
|
+
should "delete a page" do
|
128
|
+
assert @page.destroy, "200"
|
129
|
+
end
|
130
|
+
|
115
131
|
end
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
132
|
+
|
133
|
+
context "with two pages" do
|
134
|
+
|
135
|
+
setup do
|
136
|
+
@page1 = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
137
|
+
@page2 = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
138
|
+
end
|
139
|
+
|
140
|
+
should "find all pages" do
|
141
|
+
assert_equal @collection.pages.find(:all).length, 2
|
142
|
+
end
|
143
|
+
|
144
|
+
should "find related pages" do
|
145
|
+
assert_equal @page1.related_content.length, 1
|
146
|
+
end
|
147
|
+
|
124
148
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
149
|
+
|
150
|
+
teardown do
|
151
|
+
#token = @collection.token
|
152
|
+
@collection.destroy
|
153
|
+
#res = @muddyit.collections.find(token)
|
154
|
+
# This should be a 404 (!)
|
155
|
+
#assert_equal res.code, "404"
|
128
156
|
end
|
129
|
-
|
130
|
-
should "find related pages" do
|
131
|
-
assert_equal @page1.related_content.length, 1
|
132
|
-
end
|
133
|
-
|
157
|
+
|
134
158
|
end
|
135
|
-
|
136
|
-
teardown do
|
137
|
-
#token = @collection.token
|
138
|
-
@collection.destroy
|
139
|
-
#res = @muddyit.collections.find(token)
|
140
|
-
# This should be a 404 (!)
|
141
|
-
#assert_equal res.code, "404"
|
142
|
-
end
|
143
|
-
|
159
|
+
|
144
160
|
end
|
145
|
-
|
146
|
-
end
|
147
161
|
end
|
148
162
|
|
data/test/thing.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
|
6
|
+
require 'muddyit_fu'
|
7
|
+
Muddyit.REST_ENDPOINT = 'http://staging.muddy.it'
|
8
|
+
#muddyit = Muddyit.new('./config.yml')
|
9
|
+
page = Muddyit.extract(ARGV[0], :disambiguate => true, :include_unclassified => true, :include_content => true)
|
10
|
+
pp page.extracted_content.terms
|
11
|
+
page.entities.each do |entity|
|
12
|
+
puts "\t#{entity.term}, #{entity.uri}, #{entity.classification}"
|
13
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: muddyit_fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rattle
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-18 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -96,6 +96,7 @@ signing_key:
|
|
96
96
|
specification_version: 3
|
97
97
|
summary: Provides a ruby interface to muddy.it
|
98
98
|
test_files:
|
99
|
+
- test/thing.rb
|
99
100
|
- test/test_muddyit_fu.rb
|
100
101
|
- test/test_helper.rb
|
101
102
|
- examples/newsindexer.rb
|