monkeyhelper-muddyit_fu 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +2 -0
- data/CHANGELOG +25 -0
- data/LICENSE +20 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/lib/muddyit/base.rb +184 -0
- data/lib/muddyit/content_data.rb +21 -0
- data/lib/muddyit/entity.rb +18 -0
- data/lib/muddyit/errors.rb +19 -0
- data/lib/muddyit/generic.rb +59 -0
- data/lib/muddyit/page.rb +95 -0
- data/lib/muddyit/pages.rb +193 -0
- data/lib/muddyit/site.rb +14 -0
- data/lib/muddyit/sites.rb +45 -0
- data/lib/muddyit_fu.rb +52 -0
- data/muddyit_fu.gemspec +50 -0
- metadata +30 -23
- /data/{README → README.rdoc} +0 -0
data/.gitignore
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
muddyit_fu Changelog
|
2
|
+
|
3
|
+
0.1.0
|
4
|
+
- Moved to using Jeweller for gem management, should fix previously empty gems
|
5
|
+
|
6
|
+
0.0.4
|
7
|
+
- Fixed content_data bug when attributes hadn't been requested
|
8
|
+
- Added proper dbpedia entity support for Entity class
|
9
|
+
|
10
|
+
0.0.3
|
11
|
+
- Updated License
|
12
|
+
- Modified related and related_content functionality for pages to match muddy server API changes
|
13
|
+
|
14
|
+
0.0.2
|
15
|
+
|
16
|
+
- Added License
|
17
|
+
- Added Changelog (!)
|
18
|
+
- Moved back to using JSON for JRuby people
|
19
|
+
- Improved content data terms data struct
|
20
|
+
- Cached content data struct for a page object
|
21
|
+
- Added a 'classification' method to entity to make up for type being an existing method
|
22
|
+
|
23
|
+
0.0.1
|
24
|
+
|
25
|
+
- First Release
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Rob Lee
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "muddyit_fu"
|
8
|
+
gem.summary = "Provides a ruby interface to muddy.it"
|
9
|
+
gem.email = "robl[at]monkeyhelper.com"
|
10
|
+
gem.homepage = "http://github.com/monkeyhelper/muddyit_fu"
|
11
|
+
gem.authors = ["robl"]
|
12
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
13
|
+
end
|
14
|
+
|
15
|
+
rescue LoadError
|
16
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
Rake::TestTask.new(:test) do |test|
|
21
|
+
test.libs << 'lib' << 'test'
|
22
|
+
test.pattern = 'test/**/*_test.rb'
|
23
|
+
test.verbose = true
|
24
|
+
end
|
25
|
+
|
26
|
+
begin
|
27
|
+
require 'rcov/rcovtask'
|
28
|
+
Rcov::RcovTask.new do |test|
|
29
|
+
test.libs << 'test'
|
30
|
+
test.pattern = 'test/**/*_test.rb'
|
31
|
+
test.verbose = true
|
32
|
+
end
|
33
|
+
rescue LoadError
|
34
|
+
task :rcov do
|
35
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
task :default => :test
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
if File.exist?('VERSION.yml')
|
45
|
+
config = YAML.load(File.read('VERSION.yml'))
|
46
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
47
|
+
else
|
48
|
+
version = ""
|
49
|
+
end
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "muddyit_fu #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
56
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/muddyit/base.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
module Muddyit
|
2
|
+
|
3
|
+
def self.new(*params)
|
4
|
+
Muddyit::Base.new(*params)
|
5
|
+
end
|
6
|
+
|
7
|
+
class Base
|
8
|
+
class_attr_accessor :http_open_timeout
|
9
|
+
class_attr_accessor :http_read_timeout
|
10
|
+
attr_accessor :rest_endpoint
|
11
|
+
attr_reader :access_key_id, :secret_access_key
|
12
|
+
|
13
|
+
@@http_open_timeout = 60
|
14
|
+
@@http_read_timeout = 60
|
15
|
+
|
16
|
+
REST_ENDPOINT = 'http://www.muddy.it/api'
|
17
|
+
|
18
|
+
# Set the request signing method
|
19
|
+
@@digest1 = OpenSSL::Digest::Digest.new("sha1")
|
20
|
+
@@digest256 = nil
|
21
|
+
if OpenSSL::OPENSSL_VERSION_NUMBER > 0x00908000
|
22
|
+
@@digest256 = OpenSSL::Digest::Digest.new("sha256") rescue nil # Some installation may not support sha256
|
23
|
+
end
|
24
|
+
|
25
|
+
# create a new muddyit object
|
26
|
+
#
|
27
|
+
# You can either pass a hash with the following attributes:
|
28
|
+
#
|
29
|
+
# * :access_key_id (Required)
|
30
|
+
# the access key id
|
31
|
+
# * :secret_access_key (Required)
|
32
|
+
# the secret access key
|
33
|
+
# * :rest_endpoint (Optional)
|
34
|
+
# the muddy.it rest service endpoint
|
35
|
+
# or:
|
36
|
+
# * config_file (Required)
|
37
|
+
# yaml file to load configuration from
|
38
|
+
#
|
39
|
+
# Config Example (yaml file)
|
40
|
+
# ---
|
41
|
+
# access_key_id: YOUR_ACCESS_KEY_ID
|
42
|
+
# secret_access_key: YOUR_SECRET_ACCESS_KEY
|
43
|
+
#
|
44
|
+
def initialize(config_hash_or_file)
|
45
|
+
if config_hash_or_file.is_a? Hash
|
46
|
+
config_hash_or_file.nested_symbolize_keys!
|
47
|
+
@access_key_id = config_hash_or_file[:access_key_id]
|
48
|
+
@secret_access_key = config_hash_or_file[:secret_access_key]
|
49
|
+
@rest_endpoint = config_hash_or_file.has_key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : REST_ENDPOINT
|
50
|
+
raise 'config_hash must contain access_key_id and secret_access_key' unless @access_key_id and @secret_access_key
|
51
|
+
else
|
52
|
+
config = YAML.load_file(config_hash_or_file)
|
53
|
+
config.nested_symbolize_keys!
|
54
|
+
@access_key_id = config[:access_key_id]
|
55
|
+
@secret_access_key = config[:secret_access_key]
|
56
|
+
@rest_endpoint = config.has_key?(:rest_endpoint) ? config[:rest_endpoint] : REST_ENDPOINT
|
57
|
+
raise 'config file must contain access_key_id and secret_access_key' unless @access_key_id and @secret_access_key
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# sends a request to the muddyit REST api
|
62
|
+
#
|
63
|
+
# Params
|
64
|
+
# * api_url (Required)
|
65
|
+
# the request url (uri.path)
|
66
|
+
# * http_method (Optional)
|
67
|
+
# choose between GET (default), POST, PUT, DELETE http request.
|
68
|
+
# * options (Optional)
|
69
|
+
# hash of query parameters, you do not need to include access_key_id, secret_access_key because these are added automatically
|
70
|
+
#
|
71
|
+
def send_request(api_url, http_method = :get, options= {})
|
72
|
+
|
73
|
+
raise 'no api_url supplied' unless api_url
|
74
|
+
|
75
|
+
res = request_over_http(api_url, http_method, options)
|
76
|
+
# Strip any js wrapping methods
|
77
|
+
#puts res.body
|
78
|
+
if res.body =~ /^.+\((.+)\)$/
|
79
|
+
r = JSON.parse($1)
|
80
|
+
else
|
81
|
+
r = JSON.parse(res.body)
|
82
|
+
end
|
83
|
+
|
84
|
+
return r
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# creates and/or returns the Muddyit::Sites object
|
89
|
+
def sites() @sites ||= Muddyit::Sites.new(self) end
|
90
|
+
|
91
|
+
protected
|
92
|
+
|
93
|
+
# For easier testing. You can mock this method with a XML file you re expecting to receive
|
94
|
+
def request_over_http(api_url, http_method, options)
|
95
|
+
|
96
|
+
req = nil
|
97
|
+
http_opts = { "Accept" => "application/json", "User-Agent" => "muddyit_fu" }
|
98
|
+
url = URI.parse(api_url)
|
99
|
+
|
100
|
+
case http_method
|
101
|
+
when :get
|
102
|
+
u = url.query.nil? ? url.path : url.path+"?"+url.query
|
103
|
+
req = Net::HTTP::Get.new(u, http_opts)
|
104
|
+
when :post
|
105
|
+
req = Net::HTTP::Post.new(url.path, http_opts)
|
106
|
+
when :put
|
107
|
+
req = Net::HTTP::Put.new(url.path, http_opts)
|
108
|
+
when :delete
|
109
|
+
req = Net::HTTP::Delete.new(url.path, http_opts)
|
110
|
+
else
|
111
|
+
raise 'invalid http method specified'
|
112
|
+
end
|
113
|
+
|
114
|
+
options = calculate_signature(http_method, url.path, options)
|
115
|
+
req.set_form_data(options) unless options.keys.empty?
|
116
|
+
#req.basic_auth @username, @password
|
117
|
+
|
118
|
+
http = Net::HTTP.new(url.host, url.port)
|
119
|
+
http.open_timeout = @@http_open_timeout
|
120
|
+
http.read_timeout = @@http_read_timeout
|
121
|
+
http.start do |http|
|
122
|
+
res = http.request(req)
|
123
|
+
case res
|
124
|
+
when Net::HTTPSuccess
|
125
|
+
return res
|
126
|
+
else
|
127
|
+
raise Muddyit::Errors.error_for(res.code, 'HTTP Error')
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
# aws request signature methods, taken from http://rightscale.rubyforge.org/right_aws_gem_doc
|
134
|
+
|
135
|
+
def calculate_signature(http_verb, url, options)
|
136
|
+
endpoint = URI.parse(@rest_endpoint)
|
137
|
+
options.nested_stringify_keys!
|
138
|
+
options.delete('Signature')
|
139
|
+
options['AccessKeyId'] = @access_key_id
|
140
|
+
options['Signature'] = sign_request_v2(@secret_access_key, options, http_verb.to_s, endpoint.host, url)
|
141
|
+
|
142
|
+
return options
|
143
|
+
end
|
144
|
+
|
145
|
+
def signed_service_params(aws_secret_access_key, service_hash, http_verb, host, uri)
|
146
|
+
sign_request_v2(aws_secret_access_key, service_hash, http_verb, host, uri)
|
147
|
+
end
|
148
|
+
|
149
|
+
def sign_request_v2(aws_secret_access_key, service_hash, http_verb, host, uri)
|
150
|
+
fix_service_params(service_hash, '2')
|
151
|
+
# select a signing method (make an old openssl working with sha1)
|
152
|
+
# make 'HmacSHA256' to be a default one
|
153
|
+
service_hash['SignatureMethod'] = 'HmacSHA256' unless ['HmacSHA256', 'HmacSHA1'].include?(service_hash['SignatureMethod'])
|
154
|
+
service_hash['SignatureMethod'] = 'HmacSHA1' unless @@digest256
|
155
|
+
# select a digest
|
156
|
+
digest = (service_hash['SignatureMethod'] == 'HmacSHA256' ? @@digest256 : @@digest1)
|
157
|
+
# form string to sign
|
158
|
+
canonical_string = service_hash.keys.sort.map do |key|
|
159
|
+
"#{amz_escape(key)}=#{amz_escape(service_hash[key])}"
|
160
|
+
end.join('&')
|
161
|
+
string_to_sign = "#{http_verb.to_s.upcase}\n#{host.downcase}\n#{uri}\n#{canonical_string}"
|
162
|
+
|
163
|
+
# sign the string
|
164
|
+
amz_escape(Base64.encode64(OpenSSL::HMAC.digest(digest, aws_secret_access_key, string_to_sign)).strip)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Set a timestamp and a signature version
|
168
|
+
def fix_service_params(service_hash, signature)
|
169
|
+
service_hash["Timestamp"] ||= Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z") unless service_hash["Expires"]
|
170
|
+
service_hash["SignatureVersion"] = signature
|
171
|
+
service_hash
|
172
|
+
end
|
173
|
+
|
174
|
+
# Escape a string accordingly Amazon rulles
|
175
|
+
# http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/index.html?REST_RESTAuth.html
|
176
|
+
def amz_escape(param)
|
177
|
+
param.to_s.gsub(/([^a-zA-Z0-9._~-]+)/n) do
|
178
|
+
'%' + $1.unpack('H2' * $1.size).join('%').upcase
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class Muddyit::Sites::Site::Page::ContentData < Muddyit::Generic
|
2
|
+
|
3
|
+
def initialize(muddyit, attributes)
|
4
|
+
super(muddyit, attributes)
|
5
|
+
populate_terms
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
def populate_terms
|
12
|
+
terms = []
|
13
|
+
if @attributes.has_key?(:terms)
|
14
|
+
@attributes[:terms].each do |term|
|
15
|
+
terms.push term['term']
|
16
|
+
end
|
17
|
+
@attributes[:terms] = terms
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Muddyit::Entity < Muddyit::Generic
|
2
|
+
|
3
|
+
def classification
|
4
|
+
unless @attributes[:type]
|
5
|
+
# We merge here as we don't want to overwrite a entity specific confidence score
|
6
|
+
@attributes.merge!(self.fetch)
|
7
|
+
end
|
8
|
+
@attributes[:type]
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
def fetch
|
13
|
+
api_url = "#{@muddyit.rest_endpoint}/entities/#{URI.escape(CGI.escape(@attributes[:uri]),'.')}"
|
14
|
+
response = @muddyit.send_request(api_url, :get)
|
15
|
+
response.nested_symbolize_keys!
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Muddyit
|
2
|
+
|
3
|
+
class Error < RuntimeError
|
4
|
+
attr_accessor :code
|
5
|
+
end
|
6
|
+
|
7
|
+
class Errors
|
8
|
+
# Method used for raising the appropriate error class for a given error code.
|
9
|
+
# Currently raises only Muddyit::Error
|
10
|
+
def self.error_for(code, message)
|
11
|
+
raise RuntimeError.new("Internal error. Muddyit API error not identified or unknown error.") if (code.nil? || message.nil? || message.empty?)
|
12
|
+
raise RuntimeError.new("Internal error. Unknown error.") if code.to_i == 0 # We assume that error code 0 is never returned
|
13
|
+
e = Muddyit::Error.new("#{code}: #{message}")
|
14
|
+
e.code = code
|
15
|
+
raise e
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class Muddyit::Generic < Muddyit::Base
|
2
|
+
|
3
|
+
# superclass for data objects to inherit
|
4
|
+
#
|
5
|
+
# allows us to change the api with little code change via the magic of method
|
6
|
+
# missing :)
|
7
|
+
#
|
8
|
+
|
9
|
+
attr_accessor :attributes
|
10
|
+
|
11
|
+
# constructor
|
12
|
+
#
|
13
|
+
# Params
|
14
|
+
# * muddyit (Required)
|
15
|
+
# a muddyit::base object
|
16
|
+
# * attributes (Optional)
|
17
|
+
# hash of method => value entries used to simulate methods on a real object
|
18
|
+
#
|
19
|
+
def initialize(muddyit, attributes = {})
|
20
|
+
@muddyit = muddyit
|
21
|
+
@attributes = attributes.nested_symbolize_keys!
|
22
|
+
@info_added = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# request data from muddy.it if we haven't done so before and we don't have
|
26
|
+
# the attribute requested (acts as getter + setter)
|
27
|
+
#
|
28
|
+
# Params
|
29
|
+
# * method (Required)
|
30
|
+
# the object method to populate, from attributes or remotely
|
31
|
+
# * args (Optional)
|
32
|
+
# the value to set the method to
|
33
|
+
#
|
34
|
+
def method_missing(method, args = nil)
|
35
|
+
if @info_added == false and !@attributes.has_key?(method.to_sym)
|
36
|
+
#puts "Searching for missing method #{method.to_s}"
|
37
|
+
@attributes.merge!(self.fetch)
|
38
|
+
@info_added = true
|
39
|
+
end
|
40
|
+
unless @attributes.has_key?(method.to_sym)
|
41
|
+
raise "No method named #{method.to_s}"
|
42
|
+
end
|
43
|
+
if args.nil?
|
44
|
+
@attributes[method.to_sym]
|
45
|
+
else
|
46
|
+
@attributes[method.to_sym] = args
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
# method used to retrieve data from muddy.it service, to be overridden
|
54
|
+
#
|
55
|
+
def fetch
|
56
|
+
raise "not implemented"
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/lib/muddyit/page.rb
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
class Muddyit::Sites::Site::Page < Muddyit::Generic
|
2
|
+
|
3
|
+
# Create a set of entities from the categorisation results
|
4
|
+
def initialize(muddyit, attributes = {})
|
5
|
+
super(muddyit, attributes)
|
6
|
+
create_entities
|
7
|
+
@content_data_cache = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
# submit a page or text for re-categorisation
|
11
|
+
#
|
12
|
+
# Params
|
13
|
+
# * options (Required)
|
14
|
+
#
|
15
|
+
def refresh(options)
|
16
|
+
|
17
|
+
# Ensure we get content_data as well
|
18
|
+
options[:include_content] = true unless options.has_key?(:include_content)
|
19
|
+
|
20
|
+
# Set the URI if not set
|
21
|
+
options[:uri] = options[:identifier] if options.has_key?(:identifier) && !options.has_key?(:uri) && !options.has_key?(:text)
|
22
|
+
|
23
|
+
# Ensure we have encoded the identifier and URI
|
24
|
+
if options.has_key?(:uri)
|
25
|
+
options[:uri] = URI.escape(CGI.escape(options[:uri]),'.')
|
26
|
+
elsif options.has_key?(:identifier)
|
27
|
+
options[:identifier] = URI.escape(CGI.escape(options[:identifier]),'.')
|
28
|
+
end
|
29
|
+
|
30
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}/refresh"
|
31
|
+
response = @muddyit.send_request(api_url, :post, options)
|
32
|
+
return Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site))
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# get content_data for page
|
37
|
+
#
|
38
|
+
def content_data
|
39
|
+
if @content_data_cache.nil?
|
40
|
+
if @attributes[:content_data]
|
41
|
+
@content_data_cache = Muddyit::Sites::Site::Page::ContentData.new(@muddyit, @attributes[:content_data])
|
42
|
+
else
|
43
|
+
r = self.fetch
|
44
|
+
@content_data_cache = Muddyit::Sites::Site::Page::ContentData.new(@muddyit, r[:content_data])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
@content_data_cache
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# delete the page
|
52
|
+
#
|
53
|
+
def destroy
|
54
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}"
|
55
|
+
response = @muddyit.send_request(api_url, :delete, {})
|
56
|
+
# Is this the correct thing to return ?
|
57
|
+
return true
|
58
|
+
end
|
59
|
+
|
60
|
+
# retrieve related pages
|
61
|
+
#
|
62
|
+
# Params
|
63
|
+
# * options (Optional)
|
64
|
+
#
|
65
|
+
def related_content(options = {})
|
66
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}/related/content"
|
67
|
+
response = @muddyit.send_request(api_url, :get, options)
|
68
|
+
|
69
|
+
results = []
|
70
|
+
response.each { |result|
|
71
|
+
# The return format needs sorting out here .......
|
72
|
+
results.push :page => @attributes[:site].pages.find(result['identifier']), :count => result['count']
|
73
|
+
}
|
74
|
+
return results
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def fetch
|
79
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}"
|
80
|
+
response = @muddyit.send_request(api_url, :get, {:include_content => true })
|
81
|
+
response.nested_symbolize_keys!
|
82
|
+
end
|
83
|
+
|
84
|
+
# Convert results to entities
|
85
|
+
def create_entities
|
86
|
+
results = []
|
87
|
+
if @attributes.has_key?(:results)
|
88
|
+
@attributes[:results].each do |result|
|
89
|
+
results.push Muddyit::Entity.new(@muddyit, result)
|
90
|
+
end
|
91
|
+
@attributes[:results] = results
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
class Muddyit::Sites::Site::Pages < Muddyit::Generic
|
2
|
+
|
3
|
+
# find a specific page from the site
|
4
|
+
#
|
5
|
+
# Params
|
6
|
+
# * type (Required)
|
7
|
+
# one of :all or a page identifier
|
8
|
+
#
|
9
|
+
def find(type, options = {})
|
10
|
+
raise 'no type specified' if type.nil?
|
11
|
+
|
12
|
+
if type.is_a? Symbol
|
13
|
+
case type
|
14
|
+
when :all
|
15
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages"
|
16
|
+
if block_given?
|
17
|
+
token = nil
|
18
|
+
begin
|
19
|
+
response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
|
20
|
+
response['resultsets'].each { |page|
|
21
|
+
yield Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site))
|
22
|
+
}
|
23
|
+
token = response['next_page']
|
24
|
+
# Need to figure out which of the below actually occurs
|
25
|
+
end while !token.nil? || !token == ''
|
26
|
+
else
|
27
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages"
|
28
|
+
response = @muddyit.send_request(api_url, :get, options)
|
29
|
+
|
30
|
+
pages = []
|
31
|
+
response['resultsets'].each { |page| pages.push Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site)) }
|
32
|
+
return { :next_page => response['next_page'], :pages => pages }
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise 'invalid type specified'
|
36
|
+
end
|
37
|
+
|
38
|
+
elsif type.is_a? String
|
39
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(type),'.')}"
|
40
|
+
response = @muddyit.send_request(api_url, :get, {})
|
41
|
+
response.has_key?('results') ? Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site)) : nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# retrieve entities related to the specified entity within the site pages collection
|
46
|
+
#
|
47
|
+
# Params
|
48
|
+
# * options (Optional)
|
49
|
+
#
|
50
|
+
def related_entities(uri, options = {})
|
51
|
+
raise "no uri supplied" if uri.nil?
|
52
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/related/entities/#{URI.escape(CGI.escape(uri),'.')}"
|
53
|
+
response = @muddyit.send_request(api_url, :get, options)
|
54
|
+
|
55
|
+
results = []
|
56
|
+
response.each { |result|
|
57
|
+
# The return format needs sorting out here .......
|
58
|
+
results.push Muddyit::Entity.new(@muddyit, result)
|
59
|
+
}
|
60
|
+
return results
|
61
|
+
end
|
62
|
+
|
63
|
+
# submit a page or text for categorisation
|
64
|
+
#
|
65
|
+
# Params
|
66
|
+
# * options (Required)
|
67
|
+
#
|
68
|
+
def categorise(options)
|
69
|
+
|
70
|
+
# Ensure we get content_data as well
|
71
|
+
options[:include_content] = true
|
72
|
+
|
73
|
+
# Set the URI if not set
|
74
|
+
options[:uri] = options[:identifier] if options.has_key?(:identifier) && !options.has_key?(:uri) && !options.has_key?(:text)
|
75
|
+
|
76
|
+
# Ensure we have encoded the identifier and URI
|
77
|
+
if options.has_key?(:uri)
|
78
|
+
raise if options[:uri].nil?
|
79
|
+
options[:uri] = URI.escape(CGI.escape(options[:uri]),'.')
|
80
|
+
elsif options.has_key?(:identifier)
|
81
|
+
raise if options[:identifier].nil?
|
82
|
+
options[:identifier] = URI.escape(CGI.escape(options[:identifier]),'.')
|
83
|
+
end
|
84
|
+
|
85
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/categorise"
|
86
|
+
response = @muddyit.send_request(api_url, :post, options)
|
87
|
+
return Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site))
|
88
|
+
end
|
89
|
+
|
90
|
+
# find all pages with specified entity
|
91
|
+
#
|
92
|
+
# Params
|
93
|
+
# * uri (Required)
|
94
|
+
# a dbpedia URI
|
95
|
+
# * options (Optional)
|
96
|
+
#
|
97
|
+
#
|
98
|
+
def find_by_entity(uri, options = {}, &block)
|
99
|
+
queryAllWithURI(uri, options, &block)
|
100
|
+
end
|
101
|
+
|
102
|
+
# find all pages with specified entities
|
103
|
+
#
|
104
|
+
# Params
|
105
|
+
# * uris (Required)
|
106
|
+
# an array of dbpedia URIs
|
107
|
+
# * options (Optional)
|
108
|
+
#
|
109
|
+
#
|
110
|
+
def find_by_entities(uris, options = {}, &block)
|
111
|
+
queryAllWithURI(uris.join(','), options, &block)
|
112
|
+
end
|
113
|
+
|
114
|
+
# find all pages with specified term
|
115
|
+
#
|
116
|
+
# Params
|
117
|
+
# * term (Required)
|
118
|
+
# a string e.g. 'Gordon Brown'
|
119
|
+
# * options (Optional)
|
120
|
+
#
|
121
|
+
#
|
122
|
+
def find_by_term(term, options = {}, &block)
|
123
|
+
queryAllWithTerm(term, options, &block)
|
124
|
+
end
|
125
|
+
|
126
|
+
# find all pages with specified terms
|
127
|
+
#
|
128
|
+
# Params
|
129
|
+
# * terms (Required)
|
130
|
+
# an array of strings e.g. ['Gordon Brown', 'Tony Blair']
|
131
|
+
# * options (Optional)
|
132
|
+
#
|
133
|
+
#
|
134
|
+
def find_by_terms(terms, options = {}, &block)
|
135
|
+
queryAllWithTerm(terms.join(','), options, &block)
|
136
|
+
end
|
137
|
+
|
138
|
+
protected
|
139
|
+
|
140
|
+
# find all pages with specified entit(y|ies)
|
141
|
+
#
|
142
|
+
# multiple uris may be specified using commas
|
143
|
+
#
|
144
|
+
# Params
|
145
|
+
# * options (Required)
|
146
|
+
# must contain uri parameter which corresponds to dbpedia uri
|
147
|
+
#
|
148
|
+
def queryAllWithURI(uri, options, &block)
|
149
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/withentities/#{URI.escape(CGI.escape(uri),'.')}"
|
150
|
+
query_page(api_url, options, &block)
|
151
|
+
end
|
152
|
+
|
153
|
+
# find all pages with specified term(s)
|
154
|
+
#
|
155
|
+
# multiple terms may be specified using commas
|
156
|
+
#
|
157
|
+
# Params
|
158
|
+
# * options (Required)
|
159
|
+
#
|
160
|
+
#
|
161
|
+
def queryAllWithTerm(term, options, &block)
|
162
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/withterms/#{URI.escape(CGI.escape(term),'.')}"
|
163
|
+
query_page(api_url, options, &block)
|
164
|
+
end
|
165
|
+
|
166
|
+
# utility method for term and uri query calls
|
167
|
+
#
|
168
|
+
# Params
|
169
|
+
# * api_url (Required)
|
170
|
+
# must contain uri to make request to
|
171
|
+
#
|
172
|
+
def query_page(api_url, options)
|
173
|
+
if block_given?
|
174
|
+
token = nil
|
175
|
+
begin
|
176
|
+
options.merge!(:page => token) unless token.nil?
|
177
|
+
response = @muddyit.send_request(api_url, :get, options)
|
178
|
+
response['resultsets'].each { |page|
|
179
|
+
yield Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site))
|
180
|
+
}
|
181
|
+
token = response['next_page']
|
182
|
+
# Need to figure out which of the below actually occurs
|
183
|
+
end while !token.nil? || !token == ''
|
184
|
+
else
|
185
|
+
response = @muddyit.send_request(api_url, :get, {})
|
186
|
+
|
187
|
+
pages = []
|
188
|
+
response['resultsets'].each { |page| pages.push Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site)) }
|
189
|
+
return { :next_page => response[:next_page], :pages => pages }
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
data/lib/muddyit/site.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
class Muddyit::Sites::Site < Muddyit::Generic
|
2
|
+
|
3
|
+
# get pages object for site
|
4
|
+
#
|
5
|
+
def pages() @pages ||= Muddyit::Sites::Site::Pages.new(@muddyit, :site => self) end
|
6
|
+
|
7
|
+
protected
|
8
|
+
def fetch
|
9
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{@attributes[:token]}"
|
10
|
+
response = @muddyit.send_request(api_url, :get, {})
|
11
|
+
response['site'].nested_symbolize_keys!
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class Muddyit::Sites < Muddyit::Base
|
2
|
+
|
3
|
+
# create a new sites object
|
4
|
+
# not a muddyit:generic as it doesn't need the method missing loader
|
5
|
+
#
|
6
|
+
# Params :
|
7
|
+
#
|
8
|
+
# * muddyit (Required)
|
9
|
+
# a muddyit::base instance
|
10
|
+
#
|
11
|
+
def initialize(muddyit)
|
12
|
+
@muddyit = muddyit
|
13
|
+
end
|
14
|
+
|
15
|
+
# find a specific site
|
16
|
+
#
|
17
|
+
# Params
|
18
|
+
# * type (Required)
|
19
|
+
# one of :all or a site token
|
20
|
+
#
|
21
|
+
def find(type, options = {})
|
22
|
+
raise 'no type specified' unless type
|
23
|
+
|
24
|
+
if type.is_a? Symbol
|
25
|
+
case type
|
26
|
+
when :all
|
27
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/"
|
28
|
+
response = @muddyit.send_request(api_url, :get, options)
|
29
|
+
sites = []
|
30
|
+
response.each { |site| sites.push Muddyit::Sites::Site.new(@muddyit, site['site']) }
|
31
|
+
return sites
|
32
|
+
else
|
33
|
+
raise 'invalid type specified'
|
34
|
+
end
|
35
|
+
elsif type.is_a? String
|
36
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{type}"
|
37
|
+
response = @muddyit.send_request(api_url, :get, options)
|
38
|
+
return Muddyit::Sites::Site.new(@muddyit, response['site'])
|
39
|
+
else
|
40
|
+
raise 'invalid type specified'
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
data/lib/muddyit_fu.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'net/http'
|
3
|
+
require 'cgi'
|
4
|
+
require 'json'
|
5
|
+
#require 'json/ext'
|
6
|
+
require 'openssl'
|
7
|
+
require 'base64'
|
8
|
+
|
9
|
+
require 'pp'
|
10
|
+
|
11
|
+
class Module
|
12
|
+
def class_attr_accessor(attribute_name)
|
13
|
+
class_eval <<-CODE
|
14
|
+
def self.#{attribute_name}
|
15
|
+
@@#{attribute_name} ||= nil
|
16
|
+
end
|
17
|
+
def self.#{attribute_name}=(value)
|
18
|
+
@@#{attribute_name} = value
|
19
|
+
end
|
20
|
+
CODE
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
class Hash
|
26
|
+
# File merb/core_ext/hash.rb, line 166
|
27
|
+
def nested_symbolize_keys!
|
28
|
+
each do |k,v|
|
29
|
+
sym = k.respond_to?(:to_sym) ? k.to_sym : k
|
30
|
+
self[sym] = Hash === v ? v.nested_symbolize_keys! : v
|
31
|
+
delete(k) unless k == sym
|
32
|
+
end
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def nested_stringify_keys!
|
37
|
+
each do |k,v|
|
38
|
+
s = k.respond_to?(:to_s) ? k.to_s : k
|
39
|
+
self[s] = Hash === v ? v.nested_stringify_keys! : v
|
40
|
+
delete(k) unless k == s
|
41
|
+
end
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# base must load first
|
48
|
+
%w(base errors generic sites entity site pages page content_data).each do |file|
|
49
|
+
require File.join(File.dirname(__FILE__), 'muddyit', file)
|
50
|
+
end
|
51
|
+
|
52
|
+
|
data/muddyit_fu.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{muddyit_fu}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["robl"]
|
9
|
+
s.date = %q{2009-06-03}
|
10
|
+
s.email = %q{robl[at]monkeyhelper.com}
|
11
|
+
s.extra_rdoc_files = [
|
12
|
+
"LICENSE",
|
13
|
+
"README.rdoc"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
".gitignore",
|
17
|
+
"CHANGELOG",
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION",
|
22
|
+
"lib/muddyit/base.rb",
|
23
|
+
"lib/muddyit/content_data.rb",
|
24
|
+
"lib/muddyit/entity.rb",
|
25
|
+
"lib/muddyit/errors.rb",
|
26
|
+
"lib/muddyit/generic.rb",
|
27
|
+
"lib/muddyit/page.rb",
|
28
|
+
"lib/muddyit/pages.rb",
|
29
|
+
"lib/muddyit/site.rb",
|
30
|
+
"lib/muddyit/sites.rb",
|
31
|
+
"lib/muddyit_fu.rb",
|
32
|
+
"muddyit_fu.gemspec"
|
33
|
+
]
|
34
|
+
s.has_rdoc = true
|
35
|
+
s.homepage = %q{http://github.com/monkeyhelper/muddyit_fu}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.1}
|
39
|
+
s.summary = %q{Provides a ruby interface to muddy.it}
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
43
|
+
s.specification_version = 2
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
46
|
+
else
|
47
|
+
end
|
48
|
+
else
|
49
|
+
end
|
50
|
+
end
|
metadata
CHANGED
@@ -1,43 +1,50 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: monkeyhelper-muddyit_fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- robl
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-03 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
version_requirement:
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
25
|
-
description: Provides a ruby interface to muddy.it via the REST api
|
26
|
-
email: robl at monkeyhelper.com
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: robl[at]monkeyhelper.com
|
27
18
|
executables: []
|
28
19
|
|
29
20
|
extensions: []
|
30
21
|
|
31
22
|
extra_rdoc_files:
|
32
|
-
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
- LICENSE
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- .gitignore
|
27
|
+
- CHANGELOG
|
28
|
+
- LICENSE
|
29
|
+
- README.rdoc
|
30
|
+
- Rakefile
|
31
|
+
- VERSION
|
32
|
+
- lib/muddyit/base.rb
|
33
|
+
- lib/muddyit/content_data.rb
|
34
|
+
- lib/muddyit/entity.rb
|
35
|
+
- lib/muddyit/errors.rb
|
36
|
+
- lib/muddyit/generic.rb
|
37
|
+
- lib/muddyit/page.rb
|
38
|
+
- lib/muddyit/pages.rb
|
39
|
+
- lib/muddyit/site.rb
|
40
|
+
- lib/muddyit/sites.rb
|
41
|
+
- lib/muddyit_fu.rb
|
42
|
+
- muddyit_fu.gemspec
|
43
|
+
has_rdoc: true
|
36
44
|
homepage: http://github.com/monkeyhelper/muddyit_fu
|
37
45
|
post_install_message:
|
38
46
|
rdoc_options:
|
39
|
-
- --
|
40
|
-
- README
|
47
|
+
- --charset=UTF-8
|
41
48
|
require_paths:
|
42
49
|
- lib
|
43
50
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -58,6 +65,6 @@ rubyforge_project:
|
|
58
65
|
rubygems_version: 1.2.0
|
59
66
|
signing_key:
|
60
67
|
specification_version: 2
|
61
|
-
summary: Provides a ruby interface to muddy.it
|
68
|
+
summary: Provides a ruby interface to muddy.it
|
62
69
|
test_files: []
|
63
70
|
|
/data/{README → README.rdoc}
RENAMED
File without changes
|