muddyit_fu 0.2.11 → 0.2.12
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.rdoc +51 -16
- data/VERSION +1 -1
- data/lib/muddyit/base.rb +24 -14
- data/muddyit_fu.gemspec +4 -3
- data/test/test_muddyit_fu.rb +128 -114
- data/test/thing.rb +13 -0
- metadata +3 -2
data/README.rdoc
CHANGED
@@ -2,9 +2,7 @@
|
|
2
2
|
|
3
3
|
Muddy is an information extraction platform. For further
|
4
4
|
details see the '{Getting Started with Muddy}[http://blog.muddy.it/2009/11/getting-started-with-muddy]'
|
5
|
-
article. This gem provides access to the Muddy platform via it's API
|
6
|
-
|
7
|
-
{Muddy Developer Guide}[http://muddy.it/developers/]
|
5
|
+
article. This gem provides access to the Muddy platform via it's API (see {Muddy Developer Guide}[http://muddy.it/developers/]).
|
8
6
|
|
9
7
|
== Installation
|
10
8
|
|
@@ -16,7 +14,7 @@ article. This gem provides access to the Muddy platform via it's API :
|
|
16
14
|
|
17
15
|
Muddy supports OAuth and HTTP Basic auth for authentication and authorisation.
|
18
16
|
We recommend you use OAuth wherever possible when accessing Muddy. An example
|
19
|
-
of using OAuth with the
|
17
|
+
of using OAuth with the Muddy platform is described in the
|
20
18
|
{Building with Muddy and OAuth}[http://blog.muddy.it/2010/01/building-with-muddy-and-oauth]
|
21
19
|
article.
|
22
20
|
|
@@ -59,7 +57,7 @@ URL rather than text, just specify a URL instead :
|
|
59
57
|
|
60
58
|
Muddy allows you to store the entity extraction results so aggregate operations
|
61
59
|
can be performed over a collection of content (a 'collection' has many analysed 'pages').
|
62
|
-
A basic
|
60
|
+
A basic Muddy account provides a single 'collection' where extraction results
|
63
61
|
can be stored.
|
64
62
|
|
65
63
|
To store a page against a collection, the collection must first be found :
|
@@ -70,7 +68,16 @@ Once a collection has been found, entity extraction results can be stored in it:
|
|
70
68
|
|
71
69
|
collection.pages.create('http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm', {:minium_confidence => 0.2})
|
72
70
|
|
73
|
-
==
|
71
|
+
== Working with a collection
|
72
|
+
|
73
|
+
A collection allows aggregate operations to be perfomed on itself and on it's
|
74
|
+
members. A collection is identified by it's 'collection token'. This is an
|
75
|
+
alphanumeric six character string (e.g. 'a0ret4'). A collection can be found if
|
76
|
+
it's token is known :
|
77
|
+
|
78
|
+
collection = muddyit.collections.find('a0ret4')
|
79
|
+
|
80
|
+
=== Viewing all analysed pages
|
74
81
|
|
75
82
|
You can iterate through all the analysed pages in a collection, be aware that
|
76
83
|
the Muddy API provides the pages as paginated sets, so it may take some time to
|
@@ -87,25 +94,42 @@ for each new paginated set of results).
|
|
87
94
|
end
|
88
95
|
end
|
89
96
|
|
90
|
-
|
97
|
+
=== Finding a particular page or pages
|
91
98
|
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
it's token is known :
|
99
|
+
Each page in a collection is assigned a unique alphanumeric identifier. Whilst
|
100
|
+
this can be used to find a given page in a collection, it is possible to search
|
101
|
+
for the page using other attributes :
|
96
102
|
|
97
|
-
|
103
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
104
|
+
page = collection.pages.find(:all, :uri => 'http://news.bbc.co.uk/1/hi/business/8186840.stm').first
|
105
|
+
page = collection.pages.find(:all, :title => 'BBC NEWS | Business | ITV in 25m Friends Reunited sale').first
|
106
|
+
|
107
|
+
=== Rereshing a page's results
|
108
|
+
|
109
|
+
A page can be 'refereshed' (the entity extraction is run again) by calling the
|
110
|
+
refresh method on a page object :
|
111
|
+
|
112
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
113
|
+
updated_page = page.update
|
114
|
+
|
115
|
+
=== Deleting a page from a collection
|
98
116
|
|
99
|
-
|
117
|
+
A page can be removed from a collection by calling the 'destroy' method on a
|
118
|
+
page object :
|
100
119
|
|
101
|
-
|
120
|
+
page = collection.pages.find('5d0e32b6-fd0b-400a-ac49-dae965a292df')
|
121
|
+
page.destroy
|
122
|
+
|
123
|
+
=== View all pages containing entity 'Gordon Brown'
|
124
|
+
|
125
|
+
If we want to find all pages that reference the grounded entity for 'Gordon Brown' then
|
102
126
|
it can be searched for using it's DBpedia URI :
|
103
127
|
|
104
128
|
require 'muddyit_fu'
|
105
129
|
muddyit = Muddyit.new('./config.yml')
|
106
130
|
collection = muddyit.collections.find('a0ret4')
|
107
131
|
collection.pages.find_by_entity('http://dbpedia.org/resource/Gordon_Brown') do |page|
|
108
|
-
puts page.identifier
|
132
|
+
puts "#{page.identifier} - #{page.title}"
|
109
133
|
end
|
110
134
|
|
111
135
|
=== Find related entities for 'Gordon Brown'
|
@@ -118,7 +142,7 @@ collection :
|
|
118
142
|
collection = muddyit.collections.find('a0ret4')
|
119
143
|
puts "Related entity\tOccurance
|
120
144
|
collection.entities.find_related('http://dbpedia.org/resource/Gordon_Brown').each do |entry|
|
121
|
-
puts "#{entry[:
|
145
|
+
puts "#{entry[:entity].uri}\t#{entry[:count]}"
|
122
146
|
end
|
123
147
|
|
124
148
|
=== Find related content for : http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
|
@@ -135,6 +159,17 @@ analysed page that has a uri 'http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
|
|
135
159
|
puts "#{results[:page].title} #{results[:count]}"
|
136
160
|
end
|
137
161
|
|
162
|
+
== Batch processing content and the Muddy queue
|
163
|
+
|
164
|
+
The Muddy platform runs a background job queue that allows many requests to be
|
165
|
+
made in quick succession (rather than waiting for the full extraction request to
|
166
|
+
complete), with analysis of the pages happening asynchronously via the queue
|
167
|
+
and being stored in the collection at a later time. This can be useful when trying
|
168
|
+
to analyse large content collections. To send a request to the queue use :
|
169
|
+
|
170
|
+
collection = muddyit.collections.find('a0ret4')
|
171
|
+
collection.pages.create('http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm', {:realtime => false})
|
172
|
+
|
138
173
|
== Contact
|
139
174
|
|
140
175
|
Author: Rob Lee
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.2.
|
1
|
+
0.2.12
|
data/lib/muddyit/base.rb
CHANGED
@@ -1,9 +1,19 @@
|
|
1
1
|
module Muddyit
|
2
2
|
|
3
|
+
class_attr_accessor :REST_ENDPOINT
|
4
|
+
|
5
|
+
@@REST_ENDPOINT = 'http://muddy.it'
|
6
|
+
|
3
7
|
def self.new(*params)
|
4
8
|
Muddyit::Base.new(*params)
|
5
9
|
end
|
6
10
|
|
11
|
+
# Shortcut class method for extract
|
12
|
+
def self.extract(doc, options={})
|
13
|
+
@muddyit = Muddyit.new()
|
14
|
+
@muddyit.extract(doc, options)
|
15
|
+
end
|
16
|
+
|
7
17
|
class Base
|
8
18
|
class_attr_accessor :http_open_timeout
|
9
19
|
class_attr_accessor :http_read_timeout
|
@@ -13,8 +23,6 @@ module Muddyit
|
|
13
23
|
@@http_open_timeout = 120
|
14
24
|
@@http_read_timeout = 120
|
15
25
|
|
16
|
-
REST_ENDPOINT = 'http://www.muddy.it'
|
17
|
-
|
18
26
|
# Set the request signing method
|
19
27
|
@@digest1 = OpenSSL::Digest::Digest.new("sha1")
|
20
28
|
@@digest256 = nil
|
@@ -47,7 +55,8 @@ module Muddyit
|
|
47
55
|
# access_token: CCC
|
48
56
|
# access_token_secret: DDD
|
49
57
|
#
|
50
|
-
def initialize(config_hash_or_file)
|
58
|
+
def initialize(config_hash_or_file = {})
|
59
|
+
|
51
60
|
if config_hash_or_file.is_a? Hash
|
52
61
|
config_hash_or_file.nested_symbolize_keys!
|
53
62
|
@username = config_hash_or_file[:username]
|
@@ -56,7 +65,7 @@ module Muddyit
|
|
56
65
|
@consumer_secret = config_hash_or_file[:consumer_secret]
|
57
66
|
@access_token = config_hash_or_file[:access_token]
|
58
67
|
@access_token_secret = config_hash_or_file[:access_token_secret]
|
59
|
-
@rest_endpoint = config_hash_or_file.
|
68
|
+
@rest_endpoint = config_hash_or_file.key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : Muddyit.REST_ENDPOINT
|
60
69
|
else
|
61
70
|
config = YAML.load_file(config_hash_or_file)
|
62
71
|
config.nested_symbolize_keys!
|
@@ -66,7 +75,7 @@ module Muddyit
|
|
66
75
|
@consumer_secret = config[:consumer_secret]
|
67
76
|
@access_token = config[:access_token]
|
68
77
|
@access_token_secret = config[:access_token_secret]
|
69
|
-
@rest_endpoint = config.
|
78
|
+
@rest_endpoint = config.key?(:rest_endpoint) ? config[:rest_endpoint] : Muddyit.REST_ENDPOINT
|
70
79
|
end
|
71
80
|
|
72
81
|
if !@consumer_key.nil?
|
@@ -75,10 +84,7 @@ module Muddyit
|
|
75
84
|
@accesstoken = ::OAuth::AccessToken.new(@consumer, @access_token, @access_token_secret)
|
76
85
|
elsif !@username.nil?
|
77
86
|
@auth_type = :basic
|
78
|
-
else
|
79
|
-
raise "unable to find authentication credentials"
|
80
87
|
end
|
81
|
-
|
82
88
|
end
|
83
89
|
|
84
90
|
# sends a request to the muddyit REST api
|
@@ -99,7 +105,7 @@ module Muddyit
|
|
99
105
|
case @auth_type
|
100
106
|
when :oauth
|
101
107
|
res = oauth_request_over_http(api_url, http_method, opts, body)
|
102
|
-
when :basic
|
108
|
+
when :basic, nil
|
103
109
|
res = basic_request_over_http(api_url, http_method, opts, body)
|
104
110
|
end
|
105
111
|
|
@@ -149,7 +155,7 @@ module Muddyit
|
|
149
155
|
response = self.send_request(api_url, :post, {}, body.to_json)
|
150
156
|
return Muddyit::Collections::Collection::Pages::Page.new(self, response)
|
151
157
|
end
|
152
|
-
|
158
|
+
|
153
159
|
protected
|
154
160
|
|
155
161
|
# For easier testing. You can mock this method with a XML file you re expecting to receive
|
@@ -175,6 +181,12 @@ module Muddyit
|
|
175
181
|
|
176
182
|
def basic_request_over_http(path, http_method, opts, data)
|
177
183
|
|
184
|
+
# We only allow access to /extract as an unauthenticated user
|
185
|
+
# all other paths should raise an error
|
186
|
+
if @auth_type == nil && path != '/extract'
|
187
|
+
raise "invalid authentication credentials supplied, are the details correct ?"
|
188
|
+
end
|
189
|
+
|
178
190
|
http_opts = { "Accept" => "application/json", "Content-Type" => "application/json", "User-Agent" => "muddyit_fu" }
|
179
191
|
query_string = opts.to_a.map {|x| x.join("=")}.join("&")
|
180
192
|
|
@@ -196,14 +208,12 @@ module Muddyit
|
|
196
208
|
request.basic_auth @username, @password
|
197
209
|
request["Content-Length"] = 0 # Default to 0
|
198
210
|
when :get
|
199
|
-
|
211
|
+
path_with_query_string = opts.empty? ? path : "#{path}?#{query_string}"
|
212
|
+
request = Net::HTTP::Get.new(path_with_query_string, headers)
|
200
213
|
request.basic_auth @username, @password
|
201
214
|
when :delete
|
202
215
|
request = Net::HTTP::Delete.new(path,headers)
|
203
216
|
request.basic_auth @username, @password
|
204
|
-
when :head
|
205
|
-
request = Net::HTTP::Head.new(path,headers)
|
206
|
-
request.basic_auth @username, @password
|
207
217
|
else
|
208
218
|
raise ArgumentError, "Don't know how to handle http_method: :#{http_method.to_s}"
|
209
219
|
end
|
data/muddyit_fu.gemspec
CHANGED
@@ -2,11 +2,11 @@
|
|
2
2
|
|
3
3
|
Gem::Specification.new do |s|
|
4
4
|
s.name = %q{muddyit_fu}
|
5
|
-
s.version = "0.2.
|
5
|
+
s.version = "0.2.12"
|
6
6
|
|
7
7
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
8
|
s.authors = ["rattle"]
|
9
|
-
s.date = %q{2010-01-
|
9
|
+
s.date = %q{2010-01-18}
|
10
10
|
s.email = %q{support[at]muddy.it}
|
11
11
|
s.extra_rdoc_files = [
|
12
12
|
"LICENSE",
|
@@ -45,7 +45,8 @@ Gem::Specification.new do |s|
|
|
45
45
|
s.rubygems_version = %q{1.3.5}
|
46
46
|
s.summary = %q{Provides a ruby interface to muddy.it}
|
47
47
|
s.test_files = [
|
48
|
-
"test/
|
48
|
+
"test/thing.rb",
|
49
|
+
"test/test_muddyit_fu.rb",
|
49
50
|
"test/test_helper.rb",
|
50
51
|
"examples/newsindexer.rb",
|
51
52
|
"examples/oauth.rb"
|
data/test/test_muddyit_fu.rb
CHANGED
@@ -6,143 +6,157 @@ class TestMuddyitFu < Test::Unit::TestCase
|
|
6
6
|
@@COLLECTION_LABEL = Time.now.to_s
|
7
7
|
@@STORY = 'http://news.bbc.co.uk/1/hi/business/8186840.stm'
|
8
8
|
|
9
|
-
context 'A muddy account' do
|
9
|
+
context 'A user without a muddy account' do
|
10
10
|
|
11
11
|
setup do
|
12
12
|
c = load_config
|
13
|
-
|
14
|
-
@muddyit = Muddyit.new(:consumer_key => c['consumer_key'],
|
15
|
-
:consumer_secret => c['consumer_secret'],
|
16
|
-
:access_token => c['access_token'],
|
17
|
-
:access_token_secret => c['access_token_secret'],
|
18
|
-
:rest_endpoint => c['rest_endpoint'],
|
19
|
-
:username => c['username'],
|
20
|
-
:password => c['password'])
|
21
|
-
rescue
|
22
|
-
puts "Failed to connect to muddy, are the details correct ?"
|
23
|
-
end
|
13
|
+
Muddyit.REST_ENDPOINT = c['rest_endpoint'] if c.key?('rest_endpoint')
|
24
14
|
end
|
25
15
|
|
26
|
-
should "analyse a page without a collection" do
|
27
|
-
page =
|
16
|
+
should "be able to analyse a page without a collection" do
|
17
|
+
page = Muddyit.extract(@@STORY)
|
28
18
|
assert page.entities.length > 0
|
29
19
|
end
|
30
20
|
|
31
|
-
|
32
|
-
collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
33
|
-
assert !collection.token.nil?
|
34
|
-
end
|
35
|
-
|
36
|
-
should 'be able to find a collection' do
|
37
|
-
# This is a bit rubbish
|
38
|
-
@muddyit.collections.find(:all).each do |collection|
|
39
|
-
if collection.label == @@COLLECTION_LABEL
|
40
|
-
assert true
|
41
|
-
end
|
42
|
-
end
|
43
|
-
end
|
44
|
-
|
45
|
-
should 'be able to destroy a collection' do
|
46
|
-
# This is also a bit rubbish
|
47
|
-
collections = @muddyit.collections.find(:all)
|
48
|
-
collections.each do |collection|
|
49
|
-
if collection.label == @@COLLECTION_LABEL
|
50
|
-
res = collection.destroy
|
51
|
-
assert_equal res.code, "200"
|
52
|
-
end
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
context "with a collection" do
|
21
|
+
end
|
57
22
|
|
23
|
+
context 'A user with a muddy account' do
|
24
|
+
|
58
25
|
setup do
|
59
|
-
|
26
|
+
c = load_config
|
27
|
+
begin
|
28
|
+
@muddyit = Muddyit.new(:consumer_key => c['consumer_key'],
|
29
|
+
:consumer_secret => c['consumer_secret'],
|
30
|
+
:access_token => c['access_token'],
|
31
|
+
:access_token_secret => c['access_token_secret'],
|
32
|
+
:rest_endpoint => c['rest_endpoint'],
|
33
|
+
:username => c['username'],
|
34
|
+
:password => c['password'])
|
35
|
+
rescue
|
36
|
+
puts "Failed to connect to muddy, are the details correct ?"
|
37
|
+
end
|
60
38
|
end
|
61
|
-
|
62
|
-
should "
|
63
|
-
page = @
|
39
|
+
|
40
|
+
should "be able to analyse a page without a collection" do
|
41
|
+
page = @muddyit.extract(@@STORY)
|
64
42
|
assert page.entities.length > 0
|
65
|
-
pages = @collection.pages.find(:all)
|
66
|
-
assert pages[:pages].length == 0
|
67
43
|
end
|
68
|
-
|
69
|
-
should
|
70
|
-
|
71
|
-
assert
|
72
|
-
pages = @collection.pages.find(:all)
|
73
|
-
assert_equal pages[:pages].length, 1
|
44
|
+
|
45
|
+
should 'be able to create a collection' do
|
46
|
+
collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
47
|
+
assert !collection.token.nil?
|
74
48
|
end
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
49
|
+
|
50
|
+
should 'be able to find a collection' do
|
51
|
+
# This is a bit rubbish
|
52
|
+
@muddyit.collections.find(:all).each do |collection|
|
53
|
+
if collection.label == @@COLLECTION_LABEL
|
54
|
+
assert true
|
55
|
+
end
|
80
56
|
end
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
assert !@page.uri.nil?
|
92
|
-
#assert !@page.token.nil?
|
93
|
-
# More attributes here ?
|
57
|
+
end
|
58
|
+
|
59
|
+
should 'be able to destroy a collection' do
|
60
|
+
# This is also a bit rubbish
|
61
|
+
collections = @muddyit.collections.find(:all)
|
62
|
+
collections.each do |collection|
|
63
|
+
if collection.label == @@COLLECTION_LABEL
|
64
|
+
res = collection.destroy
|
65
|
+
assert_equal res.code, "200"
|
66
|
+
end
|
94
67
|
end
|
95
|
-
|
96
|
-
|
97
|
-
|
68
|
+
end
|
69
|
+
|
70
|
+
context "with a collection" do
|
71
|
+
|
72
|
+
setup do
|
73
|
+
@collection = @muddyit.collections.create(@@COLLECTION_LABEL, 'http://www.test.com')
|
98
74
|
end
|
99
|
-
|
100
|
-
should "
|
101
|
-
|
102
|
-
assert
|
103
|
-
|
75
|
+
|
76
|
+
should "categorise a page in realtime and not store it" do
|
77
|
+
page = @collection.pages.create({:uri => @@STORY}, :realtime => true, :store => false)
|
78
|
+
assert page.entities.length > 0
|
79
|
+
pages = @collection.pages.find(:all)
|
80
|
+
assert pages[:pages].length == 0
|
104
81
|
end
|
105
|
-
|
106
|
-
should "
|
107
|
-
|
108
|
-
assert
|
109
|
-
|
110
|
-
|
82
|
+
|
83
|
+
should "categorise a page in realtime and store it" do
|
84
|
+
page = @collection.pages.create({:uri => @@STORY}, :realtime => true, :store => true)
|
85
|
+
assert page.entities.length > 0
|
86
|
+
pages = @collection.pages.find(:all)
|
87
|
+
assert_equal pages[:pages].length, 1
|
111
88
|
end
|
112
|
-
|
113
|
-
|
114
|
-
|
89
|
+
|
90
|
+
context "with a page" do
|
91
|
+
|
92
|
+
setup do
|
93
|
+
@page = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
94
|
+
end
|
95
|
+
|
96
|
+
should "find a page" do
|
97
|
+
assert_equal @collection.pages.find(@page.identifier).identifier, @page.identifier
|
98
|
+
end
|
99
|
+
|
100
|
+
should "have page attributes" do
|
101
|
+
assert !@page.identifier.nil?
|
102
|
+
assert !@page.title.nil?
|
103
|
+
assert !@page.created_at.nil?
|
104
|
+
assert !@page.content.nil?
|
105
|
+
assert !@page.uri.nil?
|
106
|
+
#assert !@page.token.nil?
|
107
|
+
# More attributes here ?
|
108
|
+
end
|
109
|
+
|
110
|
+
should "have many entities" do
|
111
|
+
assert @page.entities.length > 0
|
112
|
+
end
|
113
|
+
|
114
|
+
should "have an entity with a term and label" do
|
115
|
+
entity = @page.entities.first
|
116
|
+
assert !entity.term.nil?
|
117
|
+
assert !entity.uri.nil?
|
118
|
+
end
|
119
|
+
|
120
|
+
should "have extracted content" do
|
121
|
+
assert !@page.extracted_content.content.nil?
|
122
|
+
assert @page.extracted_content.terms.length > 0
|
123
|
+
assert @page.extracted_content.start_position > 0
|
124
|
+
assert @page.extracted_content.end_position > 0
|
125
|
+
end
|
126
|
+
|
127
|
+
should "delete a page" do
|
128
|
+
assert @page.destroy, "200"
|
129
|
+
end
|
130
|
+
|
115
131
|
end
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
132
|
+
|
133
|
+
context "with two pages" do
|
134
|
+
|
135
|
+
setup do
|
136
|
+
@page1 = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
137
|
+
@page2 = @collection.pages.create({:uri => @@STORY}, :realtime => true)
|
138
|
+
end
|
139
|
+
|
140
|
+
should "find all pages" do
|
141
|
+
assert_equal @collection.pages.find(:all).length, 2
|
142
|
+
end
|
143
|
+
|
144
|
+
should "find related pages" do
|
145
|
+
assert_equal @page1.related_content.length, 1
|
146
|
+
end
|
147
|
+
|
124
148
|
end
|
125
|
-
|
126
|
-
|
127
|
-
|
149
|
+
|
150
|
+
teardown do
|
151
|
+
#token = @collection.token
|
152
|
+
@collection.destroy
|
153
|
+
#res = @muddyit.collections.find(token)
|
154
|
+
# This should be a 404 (!)
|
155
|
+
#assert_equal res.code, "404"
|
128
156
|
end
|
129
|
-
|
130
|
-
should "find related pages" do
|
131
|
-
assert_equal @page1.related_content.length, 1
|
132
|
-
end
|
133
|
-
|
157
|
+
|
134
158
|
end
|
135
|
-
|
136
|
-
teardown do
|
137
|
-
#token = @collection.token
|
138
|
-
@collection.destroy
|
139
|
-
#res = @muddyit.collections.find(token)
|
140
|
-
# This should be a 404 (!)
|
141
|
-
#assert_equal res.code, "404"
|
142
|
-
end
|
143
|
-
|
159
|
+
|
144
160
|
end
|
145
|
-
|
146
|
-
end
|
147
161
|
end
|
148
162
|
|
data/test/thing.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
#!/usr/bin/ruby
|
2
|
+
|
3
|
+
require 'rubygems'
|
4
|
+
|
5
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__) + '/../lib')
|
6
|
+
require 'muddyit_fu'
|
7
|
+
Muddyit.REST_ENDPOINT = 'http://staging.muddy.it'
|
8
|
+
#muddyit = Muddyit.new('./config.yml')
|
9
|
+
page = Muddyit.extract(ARGV[0], :disambiguate => true, :include_unclassified => true, :include_content => true)
|
10
|
+
pp page.extracted_content.terms
|
11
|
+
page.entities.each do |entity|
|
12
|
+
puts "\t#{entity.term}, #{entity.uri}, #{entity.classification}"
|
13
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: muddyit_fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.2.
|
4
|
+
version: 0.2.12
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- rattle
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2010-01-
|
12
|
+
date: 2010-01-18 00:00:00 +00:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -96,6 +96,7 @@ signing_key:
|
|
96
96
|
specification_version: 3
|
97
97
|
summary: Provides a ruby interface to muddy.it
|
98
98
|
test_files:
|
99
|
+
- test/thing.rb
|
99
100
|
- test/test_muddyit_fu.rb
|
100
101
|
- test/test_helper.rb
|
101
102
|
- examples/newsindexer.rb
|