muddyit_fu 0.2.6

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ nbproject
data/CHANGELOG ADDED
@@ -0,0 +1,31 @@
1
+ muddyit_fu Changelog
2
+
3
+ 0.2.1
4
+ - Updated to reflect new api
5
+
6
+ 0.2.0
7
+ - Added new oauth style authentication method
8
+
9
+ 0.1.0
10
+ - Moved to using Jeweller for gem management, should fix previously empty gems
11
+
12
+ 0.0.4
13
+ - Fixed content_data bug when attributes hadn't been requested
14
+ - Added proper dbpedia entity support for Entity class
15
+
16
+ 0.0.3
17
+ - Updated License
18
+ - Modified related and related_content functionality for pages to match muddy server API changes
19
+
20
+ 0.0.2
21
+
22
+ - Added License
23
+ - Added Changelog (!)
24
+ - Moved back to using JSON for JRuby people
25
+ - Improved content data terms data struct
26
+ - Cached content data struct for a page object
27
+ - Added a 'classification' method to entity to make up for type being an existing method
28
+
29
+ 0.0.1
30
+
31
+ - First Release
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Rob Lee
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,111 @@
1
+ = muddyit_fu
2
+
3
+ == Installation
4
+
5
+ sudo gem install gemcutter
6
+ sudo gem tumble
7
+ sudo gem install muddyit_fu
8
+
9
+ == Getting started
10
+
11
+ muddy.it uses oauth to manage it's api access. To access the muddy.it data
12
+ programmatically you will need to register an application. Login and visit :
13
+
14
+ http://www.muddy.it/oauth_clients/
15
+
16
+ You can register an application here, a callback URI isn't required.
17
+
18
+ The 'consumer token' and 'consumer secret' are used to generate a token for
19
+ accessing muddy.it. For further details and an example of how to programatically
20
+ generate a new access token for muddy.it see here :
21
+
22
+ http://stakeventures.com/articles/2008/02/23/developing-oauth-clients-in-ruby
23
+
24
+ See the 'Authorising clients using irb' section for a sample irb session.
25
+
26
+ These details are then used to provide access to the service. The credentials
27
+ can be stored in a yml file, an example of which is provided below.
28
+
29
+ == Example muddyit.yml
30
+
31
+ ---
32
+ consumer_key: "YOUR_CONSUMER_KEY"
33
+ consumer_secret: "YOUR_CONSUMER_SECRET"
34
+ access_token: "YOUR_ACCESS_TOKEN"
35
+ access_token_secret: "YOUR_ACCESS_TOKEN_SECRET"
36
+
37
+ == Retrieving all sites
38
+
39
+ require 'muddyit_fu'
40
+ muddyit = Muddyit.new('muddyit.yml')
41
+ muddyit.sites.find(:all).each do |site|
42
+ puts "#{site.label} : #{site.token}"
43
+ end
44
+
45
+ == Retrieving a single site
46
+
47
+ require 'muddyit_fu'
48
+ muddyit = Muddyit.new('muddyit.yml')
49
+ puts muddyit.sites.find('a0ret4').label
50
+
51
+ == Categorisation request
52
+
53
+ require 'muddyit_fu'
54
+ muddyit = Muddyit.new('muddyit.yml')
55
+ site = muddyit.sites.first
56
+ site.pages.create({:identifier => 'http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm'}, {:minium_confidence => 0.2})
57
+
58
+ == View categorised pages
59
+
60
+ require 'muddyit_fu'
61
+ muddyit = Muddyit.new(:consumer_key => 'aaa',
62
+ :consumer_secret => 'bbb',
63
+ :access_token => 'ccc',
64
+ :access_token_secret => 'ddd')
65
+ site = muddyit.sites.first
66
+ site.pages.find(:all) do |page|
67
+ puts page.title
68
+ page.entities.each do |entity|
69
+ puts entity.uri
70
+ end
71
+ end
72
+
73
+ == View all pages containing 'Gordon Brown'
74
+
75
+ require 'muddyit_fu'
76
+ muddyit = Muddyit.new('muddyit.yml')
77
+ site = muddyit.sites.find(:all).first
78
+ site.pages.find_by_entity('http://dbpedia.org/resource/Gordon_Brown') do |page|
79
+ puts page.identifier
80
+ end
81
+
82
+ == Find related entities for 'Gordon Brown'
83
+
84
+ require 'muddyit_fu'
85
+ muddyit = Muddyit.new('muddyit.yml')
86
+ site = muddyit.sites.find(:all).first
87
+ puts "Related entity\tOccurance
88
+ site.entities.find_related('http://dbpedia.org/resource/Gordon_Brown').each do |entry|
89
+ puts "#{entry[:enity].uri}\t#{entry[:count]}"
90
+ end
91
+
92
+ == Find related content for : http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
93
+
94
+ require 'muddyit_fu'
95
+ muddyit = Muddyit.new('muddyit.yml')
96
+ site = muddyit.sites.find(:all).first
97
+ page = site.pages.find(:all, :uri => 'http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm').first
98
+ puts "Our page : #{page.title}\n\n"
99
+ page.related_content.each do |results|
100
+ puts "#{results[:page].title} #{results[:count]}"
101
+ end
102
+
103
+ == Obtaining oauth access credentials
104
+
105
+ See http://gist.github.com/178993
106
+
107
+ == Contact
108
+
109
+ Author: Rob Lee
110
+ Email: support [at] muddy.it
111
+ Main Repository: http://github.com/rattle/muddyit_fu/tree/master
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "muddyit_fu"
8
+ gem.summary = "Provides a ruby interface to muddy.it"
9
+ gem.email = "support[at]muddy.it"
10
+ gem.homepage = "http://github.com/rattle/muddyit_fu"
11
+ gem.authors = ["rattle"]
12
+ gem.add_dependency('json', '>= 0.0.0')
13
+ gem.add_dependency('oauth', '>= 0.3.6')
14
+
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/*_test.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/*_test.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION.yml')
48
+ config = YAML.load(File.read('VERSION.yml'))
49
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
50
+ else
51
+ version = ""
52
+ end
53
+
54
+ rdoc.rdoc_dir = 'rdoc'
55
+ rdoc.title = "muddyit_fu #{version}"
56
+ rdoc.rdoc_files.include('README*')
57
+ rdoc.rdoc_files.include('lib/**/*.rb')
58
+ end
59
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.6
@@ -0,0 +1,129 @@
1
+ module Muddyit
2
+
3
+ def self.new(*params)
4
+ Muddyit::Base.new(*params)
5
+ end
6
+
7
+ class Base
8
+ class_attr_accessor :http_open_timeout
9
+ class_attr_accessor :http_read_timeout
10
+ attr_accessor :rest_endpoint
11
+ attr_reader :consumer_key, :consumer_secret, :access_token, :access_token_secret
12
+
13
+ @@http_open_timeout = 120
14
+ @@http_read_timeout = 120
15
+
16
+ REST_ENDPOINT = 'http://www.muddy.it'
17
+
18
+ # Set the request signing method
19
+ @@digest1 = OpenSSL::Digest::Digest.new("sha1")
20
+ @@digest256 = nil
21
+ if OpenSSL::OPENSSL_VERSION_NUMBER > 0x00908000
22
+ @@digest256 = OpenSSL::Digest::Digest.new("sha256") rescue nil # Some installation may not support sha256
23
+ end
24
+
25
+ # create a new muddyit object
26
+ #
27
+ # You can either pass a hash with the following attributes:
28
+ #
29
+ # * :consumer_key (Required)
30
+ # the consumer key
31
+ # * :consumer_secret (Required)
32
+ # the consumer secret
33
+ # * :access_token (Required)
34
+ # the token
35
+ # * :access_token_secret (Required)
36
+ # the token secret
37
+ # * :rest_endpoint (Optional)
38
+ # the muddy.it rest service endpoint
39
+ # or:
40
+ # * config_file (Required)
41
+ # yaml file to load configuration from
42
+ #
43
+ # Config Example (yaml file)
44
+ # ---
45
+ # consumer_key: AAA
46
+ # consumer_secret: BBB
47
+ # access_token: CCC
48
+ # access_token_secret: DDD
49
+ #
50
+ def initialize(config_hash_or_file)
51
+ if config_hash_or_file.is_a? Hash
52
+ config_hash_or_file.nested_symbolize_keys!
53
+ @consumer_key = config_hash_or_file[:consumer_key]
54
+ @consumer_secret = config_hash_or_file[:consumer_secret]
55
+ @access_token = config_hash_or_file[:access_token]
56
+ @access_token_secret = config_hash_or_file[:access_token_secret]
57
+ @rest_endpoint = config_hash_or_file.has_key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : REST_ENDPOINT
58
+ raise 'config_hash must contain consumer_key and consumer_secret' unless @consumer_key and @consumer_secret
59
+ else
60
+ config = YAML.load_file(config_hash_or_file)
61
+ config.nested_symbolize_keys!
62
+ @consumer_key = config[:consumer_key]
63
+ @consumer_secret = config[:consumer_secret]
64
+ @access_token = config[:access_token]
65
+ @access_token_secret = config[:access_token_secret]
66
+ @rest_endpoint = config.has_key?(:rest_endpoint) ? config[:rest_endpoint] : REST_ENDPOINT
67
+ raise 'config file must contain consumer_key and consumer_secret' unless @consumer_key and @consumer_secret
68
+ end
69
+
70
+ @consumer = OAuth::Consumer.new(@consumer_key, @consumer_secret, {:site=>@rest_endpoint})
71
+ @accesstoken = OAuth::AccessToken.new(@consumer, @access_token, @access_token_secret)
72
+
73
+ end
74
+
75
+ # sends a request to the muddyit REST api
76
+ #
77
+ # Params
78
+ # * api_url (Required)
79
+ # the request url (uri.path)
80
+ # * http_method (Optional)
81
+ # choose between GET (default), POST, PUT, DELETE http request.
82
+ # * options (Optional)
83
+ # hash of query parameters, you do not need to include access_key_id, secret_access_key because these are added automatically
84
+ #
85
+ def send_request(api_url, http_method = :get, opts = {}, body = nil)
86
+
87
+ raise 'no api_url supplied' unless api_url
88
+ res = request_over_http(api_url, http_method, opts, body)
89
+ # Strip any js wrapping methods
90
+
91
+ if res.body =~ /^.+\((.+)\)$/
92
+ r = JSON.parse($1)
93
+ else
94
+ r = JSON.parse(res.body)
95
+ end
96
+
97
+ return r
98
+ end
99
+
100
+
101
+ # creates and/or returns the Muddyit::Sites object
102
+ def sites() @sites ||= Muddyit::Sites.new(self) end
103
+
104
+ protected
105
+
106
+ # For easier testing. You can mock this method with a XML file you re expecting to receive
107
+ def request_over_http(api_url, http_method, opts, body)
108
+
109
+ http_opts = { "Accept" => "application/json", "Content-Type" => "application/json", "User-Agent" => "muddyit_fu" }
110
+ query_string = opts.to_a.map {|x| x.join("=")}.join("&")
111
+
112
+ case http_method
113
+ when :get
114
+ url = opts.empty? ? api_url : "#{api_url}?#{query_string}"
115
+ @accesstoken.get(url, http_opts)
116
+ when :post
117
+ @accesstoken.post(api_url, body, http_opts)
118
+ when :put
119
+ @accesstoken.put(api_url, body, http_opts)
120
+ when :delete
121
+ @accesstoken.delete(api_url, http_opts)
122
+ else
123
+ raise 'invalid http method specified'
124
+ end
125
+
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,3 @@
1
+ class Entities
2
+ # Placeholder
3
+ end
@@ -0,0 +1,19 @@
1
+ module Muddyit
2
+
3
+ class Error < RuntimeError
4
+ attr_accessor :code
5
+ end
6
+
7
+ class Errors
8
+ # Method used for raising the appropriate error class for a given error code.
9
+ # Currently raises only Muddyit::Error
10
+ def self.error_for(code, message)
11
+ raise RuntimeError.new("Internal error. Muddyit API error not identified or unknown error.") if (code.nil? || message.nil? || message.empty?)
12
+ raise RuntimeError.new("Internal error. Unknown error.") if code.to_i == 0 # We assume that error code 0 is never returned
13
+ e = Muddyit::Error.new("#{code}: #{message}")
14
+ e.code = code
15
+ raise e
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,59 @@
1
+ class Muddyit::Generic < Muddyit::Base
2
+
3
+ # superclass for data objects to inherit
4
+ #
5
+ # allows us to change the api with little code change via the magic of method
6
+ # missing :)
7
+ #
8
+
9
+ attr_accessor :attributes
10
+
11
+ # constructor
12
+ #
13
+ # Params
14
+ # * muddyit (Required)
15
+ # a muddyit::base object
16
+ # * attributes (Optional)
17
+ # hash of method => value entries used to simulate methods on a real object
18
+ #
19
+ def initialize(muddyit, attributes = {})
20
+ @muddyit = muddyit
21
+ @attributes = attributes.nested_symbolize_keys!
22
+ @info_added = false
23
+ end
24
+
25
+ # request data from muddy.it if we haven't done so before and we don't have
26
+ # the attribute requested (acts as getter + setter)
27
+ #
28
+ # Params
29
+ # * method (Required)
30
+ # the object method to populate, from attributes or remotely
31
+ # * args (Optional)
32
+ # the value to set the method to
33
+ #
34
+ def method_missing(method, args = nil)
35
+ if @info_added == false and !@attributes.has_key?(method.to_sym)
36
+ #puts "Searching for missing method #{method.to_s}"
37
+ @attributes.merge!(self.fetch)
38
+ @info_added = true
39
+ end
40
+ unless @attributes.has_key?(method.to_sym)
41
+ raise "No method named #{method.to_s}"
42
+ end
43
+ if args.nil?
44
+ @attributes[method.to_sym]
45
+ else
46
+ @attributes[method.to_sym] = args
47
+ return true
48
+ end
49
+ end
50
+
51
+ protected
52
+
53
+ # method used to retrieve data from muddy.it service, to be overridden
54
+ #
55
+ def fetch
56
+ raise "not implemented"
57
+ end
58
+
59
+ end
@@ -0,0 +1,35 @@
1
+ class Muddyit::Sites::Site::Entities::Entity < Muddyit::Generic
2
+
3
+ def classification
4
+ unless @attributes[:type]
5
+ # We merge here as we don't want to overwrite a entity specific confidence score
6
+ @attributes.merge!(self.fetch)
7
+ end
8
+ @attributes[:type]
9
+ end
10
+
11
+ # retrieve entities related to the specified entity within the site entities collection
12
+ #
13
+ # Params
14
+ # * options (Optional)
15
+ #
16
+ def related(options = {})
17
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(@attributes[:uri])}/related"
18
+ response = @muddyit.send_request(api_url, :get, options)
19
+
20
+ results = []
21
+ response.each { |result|
22
+ # The return format needs sorting out here .......
23
+ results.push Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
24
+ }
25
+ return results
26
+ end
27
+
28
+ protected
29
+ def fetch
30
+ api_url = "/sites/#{@attributes[:site][:token]}/entities/#{Digest::MD5.hexdigest(@attributes[:uri])}"
31
+ response = @muddyit.send_request(api_url, :get)
32
+ response.nested_symbolize_keys!
33
+ end
34
+
35
+ end
@@ -0,0 +1,22 @@
1
+ class Muddyit::Sites::Site::Entities < Muddyit::Generic
2
+ # Placeholder
3
+
4
+ # retrieve entities related to the specified entity within the site entities collection
5
+ #
6
+ # Params
7
+ # * options (Optional)
8
+ #
9
+ def find_related(uri, options = {})
10
+
11
+ raise if uri.nil?
12
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(uri)}/related"
13
+ response = @muddyit.send_request(api_url, :get, options)
14
+
15
+ results = []
16
+ response.each { |result|
17
+ results.push :count => result.delete('count'), :entity => Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
18
+ }
19
+ return results
20
+ end
21
+
22
+ end
@@ -0,0 +1,21 @@
1
+ class Muddyit::Sites::Site::Pages::Page::ExtractedContent < Muddyit::Generic
2
+
3
+ def initialize(muddyit, attributes)
4
+ super(muddyit, attributes)
5
+ populate_terms
6
+ end
7
+
8
+
9
+ protected
10
+
11
+ def populate_terms
12
+ terms = []
13
+ if @attributes.has_key?(:terms)
14
+ @attributes[:terms].each do |term|
15
+ terms.push term['term']
16
+ end
17
+ @attributes[:terms] = terms
18
+ end
19
+ end
20
+
21
+ end
@@ -0,0 +1,88 @@
1
+ class Muddyit::Sites::Site::Pages::Page < Muddyit::Generic
2
+
3
+ # Create a set of entities from the categorisation results
4
+ def initialize(muddyit, attributes = {})
5
+ super(muddyit, attributes)
6
+ create_entities
7
+ @extracted_content_cache = nil
8
+ end
9
+
10
+ # submit a page or text for re-categorisation
11
+ #
12
+ # Params
13
+ # * options (Required)
14
+ #
15
+ def update(options = {})
16
+
17
+ # Ensure we get extracted_content as well
18
+ options[:include_content] = true
19
+
20
+ body = { :page => { :uri => self.uri, :options => options } }
21
+
22
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{self.identifier}"
23
+ response = @muddyit.send_request(api_url, :put, {}, body.to_json)
24
+ return Muddyit::Sites::Site::Pages::Page.new(@muddyit, response['page'].merge!(:site => self.site))
25
+ end
26
+
27
+
28
+ # get extracted_content for page
29
+ #
30
+ def extracted_content
31
+ if @extracted_content_cache.nil?
32
+ if @attributes[:extracted_content]
33
+ @extracted_content_cache = Muddyit::Sites::Site::Pages::Page::ExtractedContent.new(@muddyit, @attributes[:extracted_content])
34
+ else
35
+ r = self.fetch
36
+ @extracted_content_cache = Muddyit::Sites::Site::Pages::Page::ExtractedContent.new(@muddyit, r[:extracted_content])
37
+ end
38
+ end
39
+ @extracted_content_cache
40
+ end
41
+
42
+
43
+ # delete the page
44
+ #
45
+ def destroy
46
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}"
47
+ response = @muddyit.send_request(api_url, :delete, {})
48
+ # Is this the correct thing to return ?
49
+ return true
50
+ end
51
+
52
+ # retrieve related pages
53
+ #
54
+ # Params
55
+ # * options (Optional)
56
+ #
57
+ def related_content(options = {})
58
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}/related"
59
+ response = @muddyit.send_request(api_url, :get, options, nil)
60
+ results = []
61
+ response.each { |result|
62
+ # The return format needs sorting out here .......
63
+ results.push :page => @attributes[:site].pages.find(result['identifier']), :count => result['count']
64
+ }
65
+ return results
66
+ end
67
+
68
+ protected
69
+ def fetch
70
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}"
71
+
72
+ response = @muddyit.send_request(api_url, :get, {:include_content => true}, nil)
73
+
74
+ response.nested_symbolize_keys!
75
+ end
76
+
77
+ # Convert results to entities
78
+ def create_entities
79
+ results = []
80
+ if @attributes.has_key?(:entities)
81
+ @attributes[:entities].each do |result|
82
+ results.push Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
83
+ end
84
+ @attributes[:entities] = results
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,172 @@
1
+ class Muddyit::Sites::Site::Pages < Muddyit::Generic
2
+
3
+ # find a specific page from the site
4
+ #
5
+ # Params
6
+ # * type (Required)
7
+ # one of :all or a page identifier
8
+ #
9
+ def find(type, options = {})
10
+ raise 'no type specified' if type.nil?
11
+
12
+ if type.is_a? Symbol
13
+ case type
14
+ when :all
15
+ api_url = "/sites/#{self.site.attributes[:token]}/pages"
16
+ if block_given?
17
+ token = nil
18
+ begin
19
+ response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
20
+ response['pages'].each { |page|
21
+ yield Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site))
22
+ }
23
+ token = response['next_page']
24
+ # Need to figure out which of the below actually occurs
25
+ end while !token.nil? || !token == ''
26
+ else
27
+ api_url = "/sites/#{self.site.attributes[:token]}/pages"
28
+ response = @muddyit.send_request(api_url, :get, options)
29
+
30
+ pages = []
31
+ response['pages'].each { |page| pages.push Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site)) }
32
+ return { :next_page => response['next_page'], :pages => pages }
33
+ end
34
+ else
35
+ raise 'invalid type specified'
36
+ end
37
+
38
+ elsif type.is_a? String
39
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{type}"
40
+ response = @muddyit.send_request(api_url, :get, {})
41
+ response.has_key?('identifier') ? Muddyit::Sites::Site::Pages::Page.new(@muddyit, response.merge!(:site => self.site)) : nil
42
+ end
43
+ end
44
+
45
+
46
+
47
+ # submit a page or text for categorisation
48
+ #
49
+ # Params
50
+ # * options (Required)
51
+ #
52
+ def create(doc = {}, options = {})
53
+
54
+ # Ensure we get content_data as well
55
+ options[:include_content] = true
56
+
57
+ # Ensure we have encoded the identifier and URI
58
+ unless doc[:uri] || doc[:text]
59
+ raise
60
+ end
61
+
62
+ body = { :page => doc.merge!(:options => options) }
63
+
64
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/"
65
+ response = @muddyit.send_request(api_url, :post, {}, body.to_json)
66
+ return Muddyit::Sites::Site::Pages::Page.new(@muddyit, response['page'].merge!(:site => self.site))
67
+ end
68
+
69
+ # find all pages with specified entity
70
+ #
71
+ # Params
72
+ # * uri (Required)
73
+ # a dbpedia URI
74
+ # * options (Optional)
75
+ #
76
+ #
77
+ def find_by_entity(uri, options = {}, &block)
78
+ queryAllWithURI(uri, options, &block)
79
+ end
80
+
81
+ # find all pages with specified entities
82
+ #
83
+ # Params
84
+ # * uris (Required)
85
+ # an array of dbpedia URIs
86
+ # * options (Optional)
87
+ #
88
+ #
89
+ def find_by_entities(uris, options = {}, &block)
90
+ queryAllWithURI(uris.join(','), options, &block)
91
+ end
92
+
93
+ # find all pages with specified term
94
+ #
95
+ # Params
96
+ # * term (Required)
97
+ # a string e.g. 'Gordon Brown'
98
+ # * options (Optional)
99
+ #
100
+ #
101
+ def find_by_term(term, options = {}, &block)
102
+ queryAllWithTerm(term, options, &block)
103
+ end
104
+
105
+ # find all pages with specified terms
106
+ #
107
+ # Params
108
+ # * terms (Required)
109
+ # an array of strings e.g. ['Gordon Brown', 'Tony Blair']
110
+ # * options (Optional)
111
+ #
112
+ #
113
+ def find_by_terms(terms, options = {}, &block)
114
+ queryAllWithTerm(terms.join(','), options, &block)
115
+ end
116
+
117
+ protected
118
+
119
+ # find all pages with specified entit(y|ies)
120
+ #
121
+ # multiple uris may be specified using commas
122
+ #
123
+ # Params
124
+ # * options (Required)
125
+ # must contain uri parameter which corresponds to dbpedia uri
126
+ #
127
+ def queryAllWithURI(uri, options, &block)
128
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(uri)}"
129
+ query_page(api_url, options, &block)
130
+ end
131
+
132
+ # find all pages with specified term(s)
133
+ #
134
+ # multiple terms may be specified using commas
135
+ #
136
+ # Params
137
+ # * options (Required)
138
+ #
139
+ #
140
+ def queryAllWithTerm(term, options, &block)
141
+ api_url = "/sites/#{self.site.attributes[:token]}/terms/#{term}"
142
+ query_page(api_url, options, &block)
143
+ end
144
+
145
+ # utility method for term and uri query calls
146
+ #
147
+ # Params
148
+ # * api_url (Required)
149
+ # must contain uri to make request to
150
+ #
151
+ def query_page(api_url, options)
152
+ if block_given?
153
+ token = nil
154
+ begin
155
+ options.merge!(:page => token) unless token.nil?
156
+ response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
157
+ response['pages'].each { |page|
158
+ yield Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site))
159
+ }
160
+ token = response['next_page']
161
+ # Need to figure out which of the below actually occurs
162
+ end while !token.nil? || !token == ''
163
+ else
164
+ response = @muddyit.send_request(api_url, :get, {})
165
+
166
+ pages = []
167
+ response['pages'].each { |page| pages.push Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site)) }
168
+ return { :next_page => response[:next_page], :pages => pages }
169
+ end
170
+ end
171
+
172
+ end
@@ -0,0 +1,15 @@
1
+ class Muddyit::Sites::Site < Muddyit::Generic
2
+
3
+ # get pages object for site
4
+ #
5
+ def pages() @pages ||= Muddyit::Sites::Site::Pages.new(@muddyit, :site => self) end
6
+ def entities() @entities ||= Muddyit::Sites::Site::Entities.new(@muddyit, :site => self) end
7
+
8
+ protected
9
+ def fetch
10
+ api_url = "/sites/#{@attributes[:token]}"
11
+ response = @muddyit.send_request(api_url, :get, {})
12
+ response['site'].nested_symbolize_keys!
13
+ end
14
+
15
+ end
@@ -0,0 +1,45 @@
1
+ class Muddyit::Sites < Muddyit::Base
2
+
3
+ # create a new sites object
4
+ # not a muddyit:generic as it doesn't need the method missing loader
5
+ #
6
+ # Params :
7
+ #
8
+ # * muddyit (Required)
9
+ # a muddyit::base instance
10
+ #
11
+ def initialize(muddyit)
12
+ @muddyit = muddyit
13
+ end
14
+
15
+ # find a specific site
16
+ #
17
+ # Params
18
+ # * type (Required)
19
+ # one of :all or a site token
20
+ #
21
+ def find(type, options = {})
22
+ raise 'no type specified' unless type
23
+
24
+ if type.is_a? Symbol
25
+ case type
26
+ when :all
27
+ api_url = "/sites/"
28
+ response = @muddyit.send_request(api_url, :get, options)
29
+ sites = []
30
+ response.each { |site| sites.push Muddyit::Sites::Site.new(@muddyit, site['site']) }
31
+ return sites
32
+ else
33
+ raise 'invalid type specified'
34
+ end
35
+ elsif type.is_a? String
36
+ api_url = "/sites/#{type}"
37
+ response = @muddyit.send_request(api_url, :get, options)
38
+ return Muddyit::Sites::Site.new(@muddyit, response['site'])
39
+ else
40
+ raise 'invalid type specified'
41
+ end
42
+
43
+ end
44
+
45
+ end
data/lib/muddyit_fu.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'cgi'
4
+ require 'json'
5
+ #require 'json/ext'
6
+ #gem 'monkeyhelper-oauth', :lib => 'lib/oauth'
7
+ require 'oauth/consumer'
8
+ require 'digest/md5'
9
+
10
+ require 'pp'
11
+
12
+ class Module
13
+ def class_attr_accessor(attribute_name)
14
+ class_eval <<-CODE
15
+ def self.#{attribute_name}
16
+ @@#{attribute_name} ||= nil
17
+ end
18
+ def self.#{attribute_name}=(value)
19
+ @@#{attribute_name} = value
20
+ end
21
+ CODE
22
+ end
23
+ end
24
+
25
+
26
+ class Hash
27
+ # File merb/core_ext/hash.rb, line 166
28
+ def nested_symbolize_keys!
29
+ each do |k,v|
30
+ sym = k.respond_to?(:to_sym) ? k.to_sym : k
31
+ self[sym] = Hash === v ? v.nested_symbolize_keys! : v
32
+ delete(k) unless k == sym
33
+ end
34
+ self
35
+ end
36
+
37
+ def nested_stringify_keys!
38
+ each do |k,v|
39
+ s = k.respond_to?(:to_s) ? k.to_s : k
40
+ self[s] = Hash === v ? v.nested_stringify_keys! : v
41
+ delete(k) unless k == s
42
+ end
43
+ self
44
+ end
45
+
46
+ end
47
+
48
+ # base must load first
49
+ %w(base errors generic sites entities sites/site sites/pages sites/pages/page sites/pages/page/extracted_content sites/entities sites/entities/entity).each do |file|
50
+ require File.join(File.dirname(__FILE__), 'muddyit', file)
51
+ end
52
+
53
+
@@ -0,0 +1,57 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{muddyit_fu}
5
+ s.version = "0.2.6"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["rattle"]
9
+ s.date = %q{2009-11-04}
10
+ s.email = %q{support[at]muddy.it}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ ".gitignore",
17
+ "CHANGELOG",
18
+ "LICENSE",
19
+ "README.rdoc",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/muddyit/base.rb",
23
+ "lib/muddyit/entities.rb",
24
+ "lib/muddyit/errors.rb",
25
+ "lib/muddyit/generic.rb",
26
+ "lib/muddyit/sites.rb",
27
+ "lib/muddyit/sites/entities.rb",
28
+ "lib/muddyit/sites/entities/entity.rb",
29
+ "lib/muddyit/sites/pages.rb",
30
+ "lib/muddyit/sites/pages/page.rb",
31
+ "lib/muddyit/sites/pages/page/extracted_content.rb",
32
+ "lib/muddyit/sites/site.rb",
33
+ "lib/muddyit_fu.rb",
34
+ "muddyit_fu.gemspec"
35
+ ]
36
+ s.homepage = %q{http://github.com/rattle/muddyit_fu}
37
+ s.rdoc_options = ["--charset=UTF-8"]
38
+ s.require_paths = ["lib"]
39
+ s.rubygems_version = %q{1.3.5}
40
+ s.summary = %q{Provides a ruby interface to muddy.it}
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<json>, [">= 0.0.0"])
48
+ s.add_runtime_dependency(%q<oauth>, [">= 0.3.6"])
49
+ else
50
+ s.add_dependency(%q<json>, [">= 0.0.0"])
51
+ s.add_dependency(%q<oauth>, [">= 0.3.6"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<json>, [">= 0.0.0"])
55
+ s.add_dependency(%q<oauth>, [">= 0.3.6"])
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: muddyit_fu
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.6
5
+ platform: ruby
6
+ authors:
7
+ - rattle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-04 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: json
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: oauth
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.6
34
+ version:
35
+ description:
36
+ email: support[at]muddy.it
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .gitignore
46
+ - CHANGELOG
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/muddyit/base.rb
52
+ - lib/muddyit/entities.rb
53
+ - lib/muddyit/errors.rb
54
+ - lib/muddyit/generic.rb
55
+ - lib/muddyit/sites.rb
56
+ - lib/muddyit/sites/entities.rb
57
+ - lib/muddyit/sites/entities/entity.rb
58
+ - lib/muddyit/sites/pages.rb
59
+ - lib/muddyit/sites/pages/page.rb
60
+ - lib/muddyit/sites/pages/page/extracted_content.rb
61
+ - lib/muddyit/sites/site.rb
62
+ - lib/muddyit_fu.rb
63
+ - muddyit_fu.gemspec
64
+ has_rdoc: true
65
+ homepage: http://github.com/rattle/muddyit_fu
66
+ licenses: []
67
+
68
+ post_install_message:
69
+ rdoc_options:
70
+ - --charset=UTF-8
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.5
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Provides a ruby interface to muddy.it
92
+ test_files: []
93
+