monkeyhelper-muddyit_fu 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +2 -0
- data/CHANGELOG +25 -0
- data/LICENSE +20 -0
- data/Rakefile +56 -0
- data/VERSION +1 -0
- data/lib/muddyit/base.rb +184 -0
- data/lib/muddyit/content_data.rb +21 -0
- data/lib/muddyit/entity.rb +18 -0
- data/lib/muddyit/errors.rb +19 -0
- data/lib/muddyit/generic.rb +59 -0
- data/lib/muddyit/page.rb +95 -0
- data/lib/muddyit/pages.rb +193 -0
- data/lib/muddyit/site.rb +14 -0
- data/lib/muddyit/sites.rb +45 -0
- data/lib/muddyit_fu.rb +52 -0
- data/muddyit_fu.gemspec +50 -0
- metadata +30 -23
- /data/{README → README.rdoc} +0 -0
data/.gitignore
ADDED
data/CHANGELOG
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
muddyit_fu Changelog
|
2
|
+
|
3
|
+
0.1.0
|
4
|
+
- Moved to using Jeweller for gem management, should fix previously empty gems
|
5
|
+
|
6
|
+
0.0.4
|
7
|
+
- Fixed content_data bug when attributes hadn't been requested
|
8
|
+
- Added proper dbpedia entity support for Entity class
|
9
|
+
|
10
|
+
0.0.3
|
11
|
+
- Updated License
|
12
|
+
- Modified related and related_content functionality for pages to match muddy server API changes
|
13
|
+
|
14
|
+
0.0.2
|
15
|
+
|
16
|
+
- Added License
|
17
|
+
- Added Changelog (!)
|
18
|
+
- Moved back to using JSON for JRuby people
|
19
|
+
- Improved content data terms data struct
|
20
|
+
- Cached content data struct for a page object
|
21
|
+
- Added a 'classification' method to entity to make up for type being an existing method
|
22
|
+
|
23
|
+
0.0.1
|
24
|
+
|
25
|
+
- First Release
|
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 Rob Lee
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/Rakefile
ADDED
@@ -0,0 +1,56 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "muddyit_fu"
|
8
|
+
gem.summary = "Provides a ruby interface to muddy.it"
|
9
|
+
gem.email = "robl[at]monkeyhelper.com"
|
10
|
+
gem.homepage = "http://github.com/monkeyhelper/muddyit_fu"
|
11
|
+
gem.authors = ["robl"]
|
12
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
13
|
+
end
|
14
|
+
|
15
|
+
rescue LoadError
|
16
|
+
puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
|
17
|
+
end
|
18
|
+
|
19
|
+
require 'rake/testtask'
|
20
|
+
Rake::TestTask.new(:test) do |test|
|
21
|
+
test.libs << 'lib' << 'test'
|
22
|
+
test.pattern = 'test/**/*_test.rb'
|
23
|
+
test.verbose = true
|
24
|
+
end
|
25
|
+
|
26
|
+
begin
|
27
|
+
require 'rcov/rcovtask'
|
28
|
+
Rcov::RcovTask.new do |test|
|
29
|
+
test.libs << 'test'
|
30
|
+
test.pattern = 'test/**/*_test.rb'
|
31
|
+
test.verbose = true
|
32
|
+
end
|
33
|
+
rescue LoadError
|
34
|
+
task :rcov do
|
35
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
|
40
|
+
task :default => :test
|
41
|
+
|
42
|
+
require 'rake/rdoctask'
|
43
|
+
Rake::RDocTask.new do |rdoc|
|
44
|
+
if File.exist?('VERSION.yml')
|
45
|
+
config = YAML.load(File.read('VERSION.yml'))
|
46
|
+
version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
|
47
|
+
else
|
48
|
+
version = ""
|
49
|
+
end
|
50
|
+
|
51
|
+
rdoc.rdoc_dir = 'rdoc'
|
52
|
+
rdoc.title = "muddyit_fu #{version}"
|
53
|
+
rdoc.rdoc_files.include('README*')
|
54
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
55
|
+
end
|
56
|
+
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.1.0
|
data/lib/muddyit/base.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
module Muddyit
|
2
|
+
|
3
|
+
def self.new(*params)
|
4
|
+
Muddyit::Base.new(*params)
|
5
|
+
end
|
6
|
+
|
7
|
+
class Base
|
8
|
+
class_attr_accessor :http_open_timeout
|
9
|
+
class_attr_accessor :http_read_timeout
|
10
|
+
attr_accessor :rest_endpoint
|
11
|
+
attr_reader :access_key_id, :secret_access_key
|
12
|
+
|
13
|
+
@@http_open_timeout = 60
|
14
|
+
@@http_read_timeout = 60
|
15
|
+
|
16
|
+
REST_ENDPOINT = 'http://www.muddy.it/api'
|
17
|
+
|
18
|
+
# Set the request signing method
|
19
|
+
@@digest1 = OpenSSL::Digest::Digest.new("sha1")
|
20
|
+
@@digest256 = nil
|
21
|
+
if OpenSSL::OPENSSL_VERSION_NUMBER > 0x00908000
|
22
|
+
@@digest256 = OpenSSL::Digest::Digest.new("sha256") rescue nil # Some installation may not support sha256
|
23
|
+
end
|
24
|
+
|
25
|
+
# create a new muddyit object
|
26
|
+
#
|
27
|
+
# You can either pass a hash with the following attributes:
|
28
|
+
#
|
29
|
+
# * :access_key_id (Required)
|
30
|
+
# the access key id
|
31
|
+
# * :secret_access_key (Required)
|
32
|
+
# the secret access key
|
33
|
+
# * :rest_endpoint (Optional)
|
34
|
+
# the muddy.it rest service endpoint
|
35
|
+
# or:
|
36
|
+
# * config_file (Required)
|
37
|
+
# yaml file to load configuration from
|
38
|
+
#
|
39
|
+
# Config Example (yaml file)
|
40
|
+
# ---
|
41
|
+
# access_key_id: YOUR_ACCESS_KEY_ID
|
42
|
+
# secret_access_key: YOUR_SECRET_ACCESS_KEY
|
43
|
+
#
|
44
|
+
def initialize(config_hash_or_file)
|
45
|
+
if config_hash_or_file.is_a? Hash
|
46
|
+
config_hash_or_file.nested_symbolize_keys!
|
47
|
+
@access_key_id = config_hash_or_file[:access_key_id]
|
48
|
+
@secret_access_key = config_hash_or_file[:secret_access_key]
|
49
|
+
@rest_endpoint = config_hash_or_file.has_key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : REST_ENDPOINT
|
50
|
+
raise 'config_hash must contain access_key_id and secret_access_key' unless @access_key_id and @secret_access_key
|
51
|
+
else
|
52
|
+
config = YAML.load_file(config_hash_or_file)
|
53
|
+
config.nested_symbolize_keys!
|
54
|
+
@access_key_id = config[:access_key_id]
|
55
|
+
@secret_access_key = config[:secret_access_key]
|
56
|
+
@rest_endpoint = config.has_key?(:rest_endpoint) ? config[:rest_endpoint] : REST_ENDPOINT
|
57
|
+
raise 'config file must contain access_key_id and secret_access_key' unless @access_key_id and @secret_access_key
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# sends a request to the muddyit REST api
|
62
|
+
#
|
63
|
+
# Params
|
64
|
+
# * api_url (Required)
|
65
|
+
# the request url (uri.path)
|
66
|
+
# * http_method (Optional)
|
67
|
+
# choose between GET (default), POST, PUT, DELETE http request.
|
68
|
+
# * options (Optional)
|
69
|
+
# hash of query parameters, you do not need to include access_key_id, secret_access_key because these are added automatically
|
70
|
+
#
|
71
|
+
def send_request(api_url, http_method = :get, options= {})
|
72
|
+
|
73
|
+
raise 'no api_url supplied' unless api_url
|
74
|
+
|
75
|
+
res = request_over_http(api_url, http_method, options)
|
76
|
+
# Strip any js wrapping methods
|
77
|
+
#puts res.body
|
78
|
+
if res.body =~ /^.+\((.+)\)$/
|
79
|
+
r = JSON.parse($1)
|
80
|
+
else
|
81
|
+
r = JSON.parse(res.body)
|
82
|
+
end
|
83
|
+
|
84
|
+
return r
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# creates and/or returns the Muddyit::Sites object
|
89
|
+
def sites() @sites ||= Muddyit::Sites.new(self) end
|
90
|
+
|
91
|
+
protected
|
92
|
+
|
93
|
+
# For easier testing. You can mock this method with a XML file you re expecting to receive
|
94
|
+
def request_over_http(api_url, http_method, options)
|
95
|
+
|
96
|
+
req = nil
|
97
|
+
http_opts = { "Accept" => "application/json", "User-Agent" => "muddyit_fu" }
|
98
|
+
url = URI.parse(api_url)
|
99
|
+
|
100
|
+
case http_method
|
101
|
+
when :get
|
102
|
+
u = url.query.nil? ? url.path : url.path+"?"+url.query
|
103
|
+
req = Net::HTTP::Get.new(u, http_opts)
|
104
|
+
when :post
|
105
|
+
req = Net::HTTP::Post.new(url.path, http_opts)
|
106
|
+
when :put
|
107
|
+
req = Net::HTTP::Put.new(url.path, http_opts)
|
108
|
+
when :delete
|
109
|
+
req = Net::HTTP::Delete.new(url.path, http_opts)
|
110
|
+
else
|
111
|
+
raise 'invalid http method specified'
|
112
|
+
end
|
113
|
+
|
114
|
+
options = calculate_signature(http_method, url.path, options)
|
115
|
+
req.set_form_data(options) unless options.keys.empty?
|
116
|
+
#req.basic_auth @username, @password
|
117
|
+
|
118
|
+
http = Net::HTTP.new(url.host, url.port)
|
119
|
+
http.open_timeout = @@http_open_timeout
|
120
|
+
http.read_timeout = @@http_read_timeout
|
121
|
+
http.start do |http|
|
122
|
+
res = http.request(req)
|
123
|
+
case res
|
124
|
+
when Net::HTTPSuccess
|
125
|
+
return res
|
126
|
+
else
|
127
|
+
raise Muddyit::Errors.error_for(res.code, 'HTTP Error')
|
128
|
+
end
|
129
|
+
end
|
130
|
+
|
131
|
+
end
|
132
|
+
|
133
|
+
# aws request signature methods, taken from http://rightscale.rubyforge.org/right_aws_gem_doc
|
134
|
+
|
135
|
+
def calculate_signature(http_verb, url, options)
|
136
|
+
endpoint = URI.parse(@rest_endpoint)
|
137
|
+
options.nested_stringify_keys!
|
138
|
+
options.delete('Signature')
|
139
|
+
options['AccessKeyId'] = @access_key_id
|
140
|
+
options['Signature'] = sign_request_v2(@secret_access_key, options, http_verb.to_s, endpoint.host, url)
|
141
|
+
|
142
|
+
return options
|
143
|
+
end
|
144
|
+
|
145
|
+
def signed_service_params(aws_secret_access_key, service_hash, http_verb, host, uri)
|
146
|
+
sign_request_v2(aws_secret_access_key, service_hash, http_verb, host, uri)
|
147
|
+
end
|
148
|
+
|
149
|
+
def sign_request_v2(aws_secret_access_key, service_hash, http_verb, host, uri)
|
150
|
+
fix_service_params(service_hash, '2')
|
151
|
+
# select a signing method (make an old openssl working with sha1)
|
152
|
+
# make 'HmacSHA256' to be a default one
|
153
|
+
service_hash['SignatureMethod'] = 'HmacSHA256' unless ['HmacSHA256', 'HmacSHA1'].include?(service_hash['SignatureMethod'])
|
154
|
+
service_hash['SignatureMethod'] = 'HmacSHA1' unless @@digest256
|
155
|
+
# select a digest
|
156
|
+
digest = (service_hash['SignatureMethod'] == 'HmacSHA256' ? @@digest256 : @@digest1)
|
157
|
+
# form string to sign
|
158
|
+
canonical_string = service_hash.keys.sort.map do |key|
|
159
|
+
"#{amz_escape(key)}=#{amz_escape(service_hash[key])}"
|
160
|
+
end.join('&')
|
161
|
+
string_to_sign = "#{http_verb.to_s.upcase}\n#{host.downcase}\n#{uri}\n#{canonical_string}"
|
162
|
+
|
163
|
+
# sign the string
|
164
|
+
amz_escape(Base64.encode64(OpenSSL::HMAC.digest(digest, aws_secret_access_key, string_to_sign)).strip)
|
165
|
+
end
|
166
|
+
|
167
|
+
# Set a timestamp and a signature version
|
168
|
+
def fix_service_params(service_hash, signature)
|
169
|
+
service_hash["Timestamp"] ||= Time.now.utc.strftime("%Y-%m-%dT%H:%M:%S.000Z") unless service_hash["Expires"]
|
170
|
+
service_hash["SignatureVersion"] = signature
|
171
|
+
service_hash
|
172
|
+
end
|
173
|
+
|
174
|
+
# Escape a string accordingly Amazon rulles
|
175
|
+
# http://docs.amazonwebservices.com/AmazonSimpleDB/2007-11-07/DeveloperGuide/index.html?REST_RESTAuth.html
|
176
|
+
def amz_escape(param)
|
177
|
+
param.to_s.gsub(/([^a-zA-Z0-9._~-]+)/n) do
|
178
|
+
'%' + $1.unpack('H2' * $1.size).join('%').upcase
|
179
|
+
end
|
180
|
+
end
|
181
|
+
|
182
|
+
|
183
|
+
end
|
184
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
class Muddyit::Sites::Site::Page::ContentData < Muddyit::Generic
|
2
|
+
|
3
|
+
def initialize(muddyit, attributes)
|
4
|
+
super(muddyit, attributes)
|
5
|
+
populate_terms
|
6
|
+
end
|
7
|
+
|
8
|
+
|
9
|
+
protected
|
10
|
+
|
11
|
+
def populate_terms
|
12
|
+
terms = []
|
13
|
+
if @attributes.has_key?(:terms)
|
14
|
+
@attributes[:terms].each do |term|
|
15
|
+
terms.push term['term']
|
16
|
+
end
|
17
|
+
@attributes[:terms] = terms
|
18
|
+
end
|
19
|
+
end
|
20
|
+
|
21
|
+
end
|
@@ -0,0 +1,18 @@
|
|
1
|
+
class Muddyit::Entity < Muddyit::Generic
|
2
|
+
|
3
|
+
def classification
|
4
|
+
unless @attributes[:type]
|
5
|
+
# We merge here as we don't want to overwrite a entity specific confidence score
|
6
|
+
@attributes.merge!(self.fetch)
|
7
|
+
end
|
8
|
+
@attributes[:type]
|
9
|
+
end
|
10
|
+
|
11
|
+
protected
|
12
|
+
def fetch
|
13
|
+
api_url = "#{@muddyit.rest_endpoint}/entities/#{URI.escape(CGI.escape(@attributes[:uri]),'.')}"
|
14
|
+
response = @muddyit.send_request(api_url, :get)
|
15
|
+
response.nested_symbolize_keys!
|
16
|
+
end
|
17
|
+
|
18
|
+
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
module Muddyit
|
2
|
+
|
3
|
+
class Error < RuntimeError
|
4
|
+
attr_accessor :code
|
5
|
+
end
|
6
|
+
|
7
|
+
class Errors
|
8
|
+
# Method used for raising the appropriate error class for a given error code.
|
9
|
+
# Currently raises only Muddyit::Error
|
10
|
+
def self.error_for(code, message)
|
11
|
+
raise RuntimeError.new("Internal error. Muddyit API error not identified or unknown error.") if (code.nil? || message.nil? || message.empty?)
|
12
|
+
raise RuntimeError.new("Internal error. Unknown error.") if code.to_i == 0 # We assume that error code 0 is never returned
|
13
|
+
e = Muddyit::Error.new("#{code}: #{message}")
|
14
|
+
e.code = code
|
15
|
+
raise e
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
end
|
@@ -0,0 +1,59 @@
|
|
1
|
+
class Muddyit::Generic < Muddyit::Base
|
2
|
+
|
3
|
+
# superclass for data objects to inherit
|
4
|
+
#
|
5
|
+
# allows us to change the api with little code change via the magic of method
|
6
|
+
# missing :)
|
7
|
+
#
|
8
|
+
|
9
|
+
attr_accessor :attributes
|
10
|
+
|
11
|
+
# constructor
|
12
|
+
#
|
13
|
+
# Params
|
14
|
+
# * muddyit (Required)
|
15
|
+
# a muddyit::base object
|
16
|
+
# * attributes (Optional)
|
17
|
+
# hash of method => value entries used to simulate methods on a real object
|
18
|
+
#
|
19
|
+
def initialize(muddyit, attributes = {})
|
20
|
+
@muddyit = muddyit
|
21
|
+
@attributes = attributes.nested_symbolize_keys!
|
22
|
+
@info_added = false
|
23
|
+
end
|
24
|
+
|
25
|
+
# request data from muddy.it if we haven't done so before and we don't have
|
26
|
+
# the attribute requested (acts as getter + setter)
|
27
|
+
#
|
28
|
+
# Params
|
29
|
+
# * method (Required)
|
30
|
+
# the object method to populate, from attributes or remotely
|
31
|
+
# * args (Optional)
|
32
|
+
# the value to set the method to
|
33
|
+
#
|
34
|
+
def method_missing(method, args = nil)
|
35
|
+
if @info_added == false and !@attributes.has_key?(method.to_sym)
|
36
|
+
#puts "Searching for missing method #{method.to_s}"
|
37
|
+
@attributes.merge!(self.fetch)
|
38
|
+
@info_added = true
|
39
|
+
end
|
40
|
+
unless @attributes.has_key?(method.to_sym)
|
41
|
+
raise "No method named #{method.to_s}"
|
42
|
+
end
|
43
|
+
if args.nil?
|
44
|
+
@attributes[method.to_sym]
|
45
|
+
else
|
46
|
+
@attributes[method.to_sym] = args
|
47
|
+
return true
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
protected
|
52
|
+
|
53
|
+
# method used to retrieve data from muddy.it service, to be overridden
|
54
|
+
#
|
55
|
+
def fetch
|
56
|
+
raise "not implemented"
|
57
|
+
end
|
58
|
+
|
59
|
+
end
|
data/lib/muddyit/page.rb
ADDED
@@ -0,0 +1,95 @@
|
|
1
|
+
class Muddyit::Sites::Site::Page < Muddyit::Generic
|
2
|
+
|
3
|
+
# Create a set of entities from the categorisation results
|
4
|
+
def initialize(muddyit, attributes = {})
|
5
|
+
super(muddyit, attributes)
|
6
|
+
create_entities
|
7
|
+
@content_data_cache = nil
|
8
|
+
end
|
9
|
+
|
10
|
+
# submit a page or text for re-categorisation
|
11
|
+
#
|
12
|
+
# Params
|
13
|
+
# * options (Required)
|
14
|
+
#
|
15
|
+
def refresh(options)
|
16
|
+
|
17
|
+
# Ensure we get content_data as well
|
18
|
+
options[:include_content] = true unless options.has_key?(:include_content)
|
19
|
+
|
20
|
+
# Set the URI if not set
|
21
|
+
options[:uri] = options[:identifier] if options.has_key?(:identifier) && !options.has_key?(:uri) && !options.has_key?(:text)
|
22
|
+
|
23
|
+
# Ensure we have encoded the identifier and URI
|
24
|
+
if options.has_key?(:uri)
|
25
|
+
options[:uri] = URI.escape(CGI.escape(options[:uri]),'.')
|
26
|
+
elsif options.has_key?(:identifier)
|
27
|
+
options[:identifier] = URI.escape(CGI.escape(options[:identifier]),'.')
|
28
|
+
end
|
29
|
+
|
30
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}/refresh"
|
31
|
+
response = @muddyit.send_request(api_url, :post, options)
|
32
|
+
return Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site))
|
33
|
+
end
|
34
|
+
|
35
|
+
|
36
|
+
# get content_data for page
|
37
|
+
#
|
38
|
+
def content_data
|
39
|
+
if @content_data_cache.nil?
|
40
|
+
if @attributes[:content_data]
|
41
|
+
@content_data_cache = Muddyit::Sites::Site::Page::ContentData.new(@muddyit, @attributes[:content_data])
|
42
|
+
else
|
43
|
+
r = self.fetch
|
44
|
+
@content_data_cache = Muddyit::Sites::Site::Page::ContentData.new(@muddyit, r[:content_data])
|
45
|
+
end
|
46
|
+
end
|
47
|
+
@content_data_cache
|
48
|
+
end
|
49
|
+
|
50
|
+
|
51
|
+
# delete the page
|
52
|
+
#
|
53
|
+
def destroy
|
54
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}"
|
55
|
+
response = @muddyit.send_request(api_url, :delete, {})
|
56
|
+
# Is this the correct thing to return ?
|
57
|
+
return true
|
58
|
+
end
|
59
|
+
|
60
|
+
# retrieve related pages
|
61
|
+
#
|
62
|
+
# Params
|
63
|
+
# * options (Optional)
|
64
|
+
#
|
65
|
+
def related_content(options = {})
|
66
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}/related/content"
|
67
|
+
response = @muddyit.send_request(api_url, :get, options)
|
68
|
+
|
69
|
+
results = []
|
70
|
+
response.each { |result|
|
71
|
+
# The return format needs sorting out here .......
|
72
|
+
results.push :page => @attributes[:site].pages.find(result['identifier']), :count => result['count']
|
73
|
+
}
|
74
|
+
return results
|
75
|
+
end
|
76
|
+
|
77
|
+
protected
|
78
|
+
def fetch
|
79
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(@attributes[:identifier]),'.')}"
|
80
|
+
response = @muddyit.send_request(api_url, :get, {:include_content => true })
|
81
|
+
response.nested_symbolize_keys!
|
82
|
+
end
|
83
|
+
|
84
|
+
# Convert results to entities
|
85
|
+
def create_entities
|
86
|
+
results = []
|
87
|
+
if @attributes.has_key?(:results)
|
88
|
+
@attributes[:results].each do |result|
|
89
|
+
results.push Muddyit::Entity.new(@muddyit, result)
|
90
|
+
end
|
91
|
+
@attributes[:results] = results
|
92
|
+
end
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
@@ -0,0 +1,193 @@
|
|
1
|
+
class Muddyit::Sites::Site::Pages < Muddyit::Generic
|
2
|
+
|
3
|
+
# find a specific page from the site
|
4
|
+
#
|
5
|
+
# Params
|
6
|
+
# * type (Required)
|
7
|
+
# one of :all or a page identifier
|
8
|
+
#
|
9
|
+
def find(type, options = {})
|
10
|
+
raise 'no type specified' if type.nil?
|
11
|
+
|
12
|
+
if type.is_a? Symbol
|
13
|
+
case type
|
14
|
+
when :all
|
15
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages"
|
16
|
+
if block_given?
|
17
|
+
token = nil
|
18
|
+
begin
|
19
|
+
response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
|
20
|
+
response['resultsets'].each { |page|
|
21
|
+
yield Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site))
|
22
|
+
}
|
23
|
+
token = response['next_page']
|
24
|
+
# Need to figure out which of the below actually occurs
|
25
|
+
end while !token.nil? || !token == ''
|
26
|
+
else
|
27
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages"
|
28
|
+
response = @muddyit.send_request(api_url, :get, options)
|
29
|
+
|
30
|
+
pages = []
|
31
|
+
response['resultsets'].each { |page| pages.push Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site)) }
|
32
|
+
return { :next_page => response['next_page'], :pages => pages }
|
33
|
+
end
|
34
|
+
else
|
35
|
+
raise 'invalid type specified'
|
36
|
+
end
|
37
|
+
|
38
|
+
elsif type.is_a? String
|
39
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/#{URI.escape(CGI.escape(type),'.')}"
|
40
|
+
response = @muddyit.send_request(api_url, :get, {})
|
41
|
+
response.has_key?('results') ? Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site)) : nil
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
# retrieve entities related to the specified entity within the site pages collection
|
46
|
+
#
|
47
|
+
# Params
|
48
|
+
# * options (Optional)
|
49
|
+
#
|
50
|
+
def related_entities(uri, options = {})
|
51
|
+
raise "no uri supplied" if uri.nil?
|
52
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/related/entities/#{URI.escape(CGI.escape(uri),'.')}"
|
53
|
+
response = @muddyit.send_request(api_url, :get, options)
|
54
|
+
|
55
|
+
results = []
|
56
|
+
response.each { |result|
|
57
|
+
# The return format needs sorting out here .......
|
58
|
+
results.push Muddyit::Entity.new(@muddyit, result)
|
59
|
+
}
|
60
|
+
return results
|
61
|
+
end
|
62
|
+
|
63
|
+
# submit a page or text for categorisation
|
64
|
+
#
|
65
|
+
# Params
|
66
|
+
# * options (Required)
|
67
|
+
#
|
68
|
+
def categorise(options)
|
69
|
+
|
70
|
+
# Ensure we get content_data as well
|
71
|
+
options[:include_content] = true
|
72
|
+
|
73
|
+
# Set the URI if not set
|
74
|
+
options[:uri] = options[:identifier] if options.has_key?(:identifier) && !options.has_key?(:uri) && !options.has_key?(:text)
|
75
|
+
|
76
|
+
# Ensure we have encoded the identifier and URI
|
77
|
+
if options.has_key?(:uri)
|
78
|
+
raise if options[:uri].nil?
|
79
|
+
options[:uri] = URI.escape(CGI.escape(options[:uri]),'.')
|
80
|
+
elsif options.has_key?(:identifier)
|
81
|
+
raise if options[:identifier].nil?
|
82
|
+
options[:identifier] = URI.escape(CGI.escape(options[:identifier]),'.')
|
83
|
+
end
|
84
|
+
|
85
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/categorise"
|
86
|
+
response = @muddyit.send_request(api_url, :post, options)
|
87
|
+
return Muddyit::Sites::Site::Page.new(@muddyit, response.merge!(:site => self.site))
|
88
|
+
end
|
89
|
+
|
90
|
+
# find all pages with specified entity
|
91
|
+
#
|
92
|
+
# Params
|
93
|
+
# * uri (Required)
|
94
|
+
# a dbpedia URI
|
95
|
+
# * options (Optional)
|
96
|
+
#
|
97
|
+
#
|
98
|
+
def find_by_entity(uri, options = {}, &block)
|
99
|
+
queryAllWithURI(uri, options, &block)
|
100
|
+
end
|
101
|
+
|
102
|
+
# find all pages with specified entities
|
103
|
+
#
|
104
|
+
# Params
|
105
|
+
# * uris (Required)
|
106
|
+
# an array of dbpedia URIs
|
107
|
+
# * options (Optional)
|
108
|
+
#
|
109
|
+
#
|
110
|
+
def find_by_entities(uris, options = {}, &block)
|
111
|
+
queryAllWithURI(uris.join(','), options, &block)
|
112
|
+
end
|
113
|
+
|
114
|
+
# find all pages with specified term
|
115
|
+
#
|
116
|
+
# Params
|
117
|
+
# * term (Required)
|
118
|
+
# a string e.g. 'Gordon Brown'
|
119
|
+
# * options (Optional)
|
120
|
+
#
|
121
|
+
#
|
122
|
+
def find_by_term(term, options = {}, &block)
|
123
|
+
queryAllWithTerm(term, options, &block)
|
124
|
+
end
|
125
|
+
|
126
|
+
# find all pages with specified terms
|
127
|
+
#
|
128
|
+
# Params
|
129
|
+
# * terms (Required)
|
130
|
+
# an array of strings e.g. ['Gordon Brown', 'Tony Blair']
|
131
|
+
# * options (Optional)
|
132
|
+
#
|
133
|
+
#
|
134
|
+
def find_by_terms(terms, options = {}, &block)
|
135
|
+
queryAllWithTerm(terms.join(','), options, &block)
|
136
|
+
end
|
137
|
+
|
138
|
+
protected
|
139
|
+
|
140
|
+
# find all pages with specified entit(y|ies)
|
141
|
+
#
|
142
|
+
# multiple uris may be specified using commas
|
143
|
+
#
|
144
|
+
# Params
|
145
|
+
# * options (Required)
|
146
|
+
# must contain uri parameter which corresponds to dbpedia uri
|
147
|
+
#
|
148
|
+
def queryAllWithURI(uri, options, &block)
|
149
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/withentities/#{URI.escape(CGI.escape(uri),'.')}"
|
150
|
+
query_page(api_url, options, &block)
|
151
|
+
end
|
152
|
+
|
153
|
+
# find all pages with specified term(s)
|
154
|
+
#
|
155
|
+
# multiple terms may be specified using commas
|
156
|
+
#
|
157
|
+
# Params
|
158
|
+
# * options (Required)
|
159
|
+
#
|
160
|
+
#
|
161
|
+
def queryAllWithTerm(term, options, &block)
|
162
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{self.site.attributes[:token]}/pages/withterms/#{URI.escape(CGI.escape(term),'.')}"
|
163
|
+
query_page(api_url, options, &block)
|
164
|
+
end
|
165
|
+
|
166
|
+
# utility method for term and uri query calls
|
167
|
+
#
|
168
|
+
# Params
|
169
|
+
# * api_url (Required)
|
170
|
+
# must contain uri to make request to
|
171
|
+
#
|
172
|
+
def query_page(api_url, options)
|
173
|
+
if block_given?
|
174
|
+
token = nil
|
175
|
+
begin
|
176
|
+
options.merge!(:page => token) unless token.nil?
|
177
|
+
response = @muddyit.send_request(api_url, :get, options)
|
178
|
+
response['resultsets'].each { |page|
|
179
|
+
yield Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site))
|
180
|
+
}
|
181
|
+
token = response['next_page']
|
182
|
+
# Need to figure out which of the below actually occurs
|
183
|
+
end while !token.nil? || !token == ''
|
184
|
+
else
|
185
|
+
response = @muddyit.send_request(api_url, :get, {})
|
186
|
+
|
187
|
+
pages = []
|
188
|
+
response['resultsets'].each { |page| pages.push Muddyit::Sites::Site::Page.new(@muddyit, page.merge!(:site => self.site)) }
|
189
|
+
return { :next_page => response[:next_page], :pages => pages }
|
190
|
+
end
|
191
|
+
end
|
192
|
+
|
193
|
+
end
|
data/lib/muddyit/site.rb
ADDED
@@ -0,0 +1,14 @@
|
|
1
|
+
class Muddyit::Sites::Site < Muddyit::Generic
|
2
|
+
|
3
|
+
# get pages object for site
|
4
|
+
#
|
5
|
+
def pages() @pages ||= Muddyit::Sites::Site::Pages.new(@muddyit, :site => self) end
|
6
|
+
|
7
|
+
protected
|
8
|
+
def fetch
|
9
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{@attributes[:token]}"
|
10
|
+
response = @muddyit.send_request(api_url, :get, {})
|
11
|
+
response['site'].nested_symbolize_keys!
|
12
|
+
end
|
13
|
+
|
14
|
+
end
|
@@ -0,0 +1,45 @@
|
|
1
|
+
class Muddyit::Sites < Muddyit::Base
|
2
|
+
|
3
|
+
# create a new sites object
|
4
|
+
# not a muddyit:generic as it doesn't need the method missing loader
|
5
|
+
#
|
6
|
+
# Params :
|
7
|
+
#
|
8
|
+
# * muddyit (Required)
|
9
|
+
# a muddyit::base instance
|
10
|
+
#
|
11
|
+
def initialize(muddyit)
|
12
|
+
@muddyit = muddyit
|
13
|
+
end
|
14
|
+
|
15
|
+
# find a specific site
|
16
|
+
#
|
17
|
+
# Params
|
18
|
+
# * type (Required)
|
19
|
+
# one of :all or a site token
|
20
|
+
#
|
21
|
+
def find(type, options = {})
|
22
|
+
raise 'no type specified' unless type
|
23
|
+
|
24
|
+
if type.is_a? Symbol
|
25
|
+
case type
|
26
|
+
when :all
|
27
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/"
|
28
|
+
response = @muddyit.send_request(api_url, :get, options)
|
29
|
+
sites = []
|
30
|
+
response.each { |site| sites.push Muddyit::Sites::Site.new(@muddyit, site['site']) }
|
31
|
+
return sites
|
32
|
+
else
|
33
|
+
raise 'invalid type specified'
|
34
|
+
end
|
35
|
+
elsif type.is_a? String
|
36
|
+
api_url = "#{@muddyit.rest_endpoint}/sites/#{type}"
|
37
|
+
response = @muddyit.send_request(api_url, :get, options)
|
38
|
+
return Muddyit::Sites::Site.new(@muddyit, response['site'])
|
39
|
+
else
|
40
|
+
raise 'invalid type specified'
|
41
|
+
end
|
42
|
+
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
data/lib/muddyit_fu.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'net/http'
|
3
|
+
require 'cgi'
|
4
|
+
require 'json'
|
5
|
+
#require 'json/ext'
|
6
|
+
require 'openssl'
|
7
|
+
require 'base64'
|
8
|
+
|
9
|
+
require 'pp'
|
10
|
+
|
11
|
+
class Module
|
12
|
+
def class_attr_accessor(attribute_name)
|
13
|
+
class_eval <<-CODE
|
14
|
+
def self.#{attribute_name}
|
15
|
+
@@#{attribute_name} ||= nil
|
16
|
+
end
|
17
|
+
def self.#{attribute_name}=(value)
|
18
|
+
@@#{attribute_name} = value
|
19
|
+
end
|
20
|
+
CODE
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
|
25
|
+
class Hash
|
26
|
+
# File merb/core_ext/hash.rb, line 166
|
27
|
+
def nested_symbolize_keys!
|
28
|
+
each do |k,v|
|
29
|
+
sym = k.respond_to?(:to_sym) ? k.to_sym : k
|
30
|
+
self[sym] = Hash === v ? v.nested_symbolize_keys! : v
|
31
|
+
delete(k) unless k == sym
|
32
|
+
end
|
33
|
+
self
|
34
|
+
end
|
35
|
+
|
36
|
+
def nested_stringify_keys!
|
37
|
+
each do |k,v|
|
38
|
+
s = k.respond_to?(:to_s) ? k.to_s : k
|
39
|
+
self[s] = Hash === v ? v.nested_stringify_keys! : v
|
40
|
+
delete(k) unless k == s
|
41
|
+
end
|
42
|
+
self
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
|
47
|
+
# base must load first
|
48
|
+
%w(base errors generic sites entity site pages page content_data).each do |file|
|
49
|
+
require File.join(File.dirname(__FILE__), 'muddyit', file)
|
50
|
+
end
|
51
|
+
|
52
|
+
|
data/muddyit_fu.gemspec
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
# -*- encoding: utf-8 -*-
|
2
|
+
|
3
|
+
Gem::Specification.new do |s|
|
4
|
+
s.name = %q{muddyit_fu}
|
5
|
+
s.version = "0.1.0"
|
6
|
+
|
7
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
8
|
+
s.authors = ["robl"]
|
9
|
+
s.date = %q{2009-06-03}
|
10
|
+
s.email = %q{robl[at]monkeyhelper.com}
|
11
|
+
s.extra_rdoc_files = [
|
12
|
+
"LICENSE",
|
13
|
+
"README.rdoc"
|
14
|
+
]
|
15
|
+
s.files = [
|
16
|
+
".gitignore",
|
17
|
+
"CHANGELOG",
|
18
|
+
"LICENSE",
|
19
|
+
"README.rdoc",
|
20
|
+
"Rakefile",
|
21
|
+
"VERSION",
|
22
|
+
"lib/muddyit/base.rb",
|
23
|
+
"lib/muddyit/content_data.rb",
|
24
|
+
"lib/muddyit/entity.rb",
|
25
|
+
"lib/muddyit/errors.rb",
|
26
|
+
"lib/muddyit/generic.rb",
|
27
|
+
"lib/muddyit/page.rb",
|
28
|
+
"lib/muddyit/pages.rb",
|
29
|
+
"lib/muddyit/site.rb",
|
30
|
+
"lib/muddyit/sites.rb",
|
31
|
+
"lib/muddyit_fu.rb",
|
32
|
+
"muddyit_fu.gemspec"
|
33
|
+
]
|
34
|
+
s.has_rdoc = true
|
35
|
+
s.homepage = %q{http://github.com/monkeyhelper/muddyit_fu}
|
36
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
37
|
+
s.require_paths = ["lib"]
|
38
|
+
s.rubygems_version = %q{1.3.1}
|
39
|
+
s.summary = %q{Provides a ruby interface to muddy.it}
|
40
|
+
|
41
|
+
if s.respond_to? :specification_version then
|
42
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
43
|
+
s.specification_version = 2
|
44
|
+
|
45
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
46
|
+
else
|
47
|
+
end
|
48
|
+
else
|
49
|
+
end
|
50
|
+
end
|
metadata
CHANGED
@@ -1,43 +1,50 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: monkeyhelper-muddyit_fu
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
|
-
-
|
7
|
+
- robl
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-
|
12
|
+
date: 2009-06-03 00:00:00 -07:00
|
13
13
|
default_executable:
|
14
|
-
dependencies:
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
version_requirement:
|
19
|
-
version_requirements: !ruby/object:Gem::Requirement
|
20
|
-
requirements:
|
21
|
-
- - ">"
|
22
|
-
- !ruby/object:Gem::Version
|
23
|
-
version: 0.0.0
|
24
|
-
version:
|
25
|
-
description: Provides a ruby interface to muddy.it via the REST api
|
26
|
-
email: robl at monkeyhelper.com
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description:
|
17
|
+
email: robl[at]monkeyhelper.com
|
27
18
|
executables: []
|
28
19
|
|
29
20
|
extensions: []
|
30
21
|
|
31
22
|
extra_rdoc_files:
|
32
|
-
-
|
33
|
-
|
34
|
-
|
35
|
-
|
23
|
+
- LICENSE
|
24
|
+
- README.rdoc
|
25
|
+
files:
|
26
|
+
- .gitignore
|
27
|
+
- CHANGELOG
|
28
|
+
- LICENSE
|
29
|
+
- README.rdoc
|
30
|
+
- Rakefile
|
31
|
+
- VERSION
|
32
|
+
- lib/muddyit/base.rb
|
33
|
+
- lib/muddyit/content_data.rb
|
34
|
+
- lib/muddyit/entity.rb
|
35
|
+
- lib/muddyit/errors.rb
|
36
|
+
- lib/muddyit/generic.rb
|
37
|
+
- lib/muddyit/page.rb
|
38
|
+
- lib/muddyit/pages.rb
|
39
|
+
- lib/muddyit/site.rb
|
40
|
+
- lib/muddyit/sites.rb
|
41
|
+
- lib/muddyit_fu.rb
|
42
|
+
- muddyit_fu.gemspec
|
43
|
+
has_rdoc: true
|
36
44
|
homepage: http://github.com/monkeyhelper/muddyit_fu
|
37
45
|
post_install_message:
|
38
46
|
rdoc_options:
|
39
|
-
- --
|
40
|
-
- README
|
47
|
+
- --charset=UTF-8
|
41
48
|
require_paths:
|
42
49
|
- lib
|
43
50
|
required_ruby_version: !ruby/object:Gem::Requirement
|
@@ -58,6 +65,6 @@ rubyforge_project:
|
|
58
65
|
rubygems_version: 1.2.0
|
59
66
|
signing_key:
|
60
67
|
specification_version: 2
|
61
|
-
summary: Provides a ruby interface to muddy.it
|
68
|
+
summary: Provides a ruby interface to muddy.it
|
62
69
|
test_files: []
|
63
70
|
|
/data/{README → README.rdoc}
RENAMED
File without changes
|