muddyit_fu 0.2.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,2 @@
1
+ *.gem
2
+ nbproject
data/CHANGELOG ADDED
@@ -0,0 +1,31 @@
1
+ muddyit_fu Changelog
2
+
3
+ 0.2.1
4
+ - Updated to reflect new api
5
+
6
+ 0.2.0
7
+ - Added new oauth style authentication method
8
+
9
+ 0.1.0
10
+ - Moved to using Jeweller for gem management, should fix previously empty gems
11
+
12
+ 0.0.4
13
+ - Fixed content_data bug when attributes hadn't been requested
14
+ - Added proper dbpedia entity support for Entity class
15
+
16
+ 0.0.3
17
+ - Updated License
18
+ - Modified related and related_content functionality for pages to match muddy server API changes
19
+
20
+ 0.0.2
21
+
22
+ - Added License
23
+ - Added Changelog (!)
24
+ - Moved back to using JSON for JRuby people
25
+ - Improved content data terms data struct
26
+ - Cached content data struct for a page object
27
+ - Added a 'classification' method to entity to make up for type being an existing method
28
+
29
+ 0.0.1
30
+
31
+ - First Release
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 Rob Lee
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,111 @@
1
+ = muddyit_fu
2
+
3
+ == Installation
4
+
5
+ sudo gem install gemcutter
6
+ sudo gem tumble
7
+ sudo gem install muddyit_fu
8
+
9
+ == Getting started
10
+
11
+ muddy.it uses oauth to manage it's api access. To access the muddy.it data
12
+ programmatically you will need to register an application. Login and visit :
13
+
14
+ http://www.muddy.it/oauth_clients/
15
+
16
+ You can register an application here, a callback URI isn't required.
17
+
18
+ The 'consumer token' and 'consumer secret' are used to generate a token for
19
+ accessing muddy.it. For further details and an example of how to programatically
20
+ generate a new access token for muddy.it see here :
21
+
22
+ http://stakeventures.com/articles/2008/02/23/developing-oauth-clients-in-ruby
23
+
24
+ See the 'Authorising clients using irb' section for a sample irb session.
25
+
26
+ These details are then used to provide access to the service. The credentials
27
+ can be stored in a yml file, an example of which is provided below.
28
+
29
+ == Example muddyit.yml
30
+
31
+ ---
32
+ consumer_key: "YOUR_CONSUMER_KEY"
33
+ consumer_secret: "YOUR_CONSUMER_SECRET"
34
+ access_token: "YOUR_ACCESS_TOKEN"
35
+ access_token_secret: "YOUR_ACCESS_TOKEN_SECRET"
36
+
37
+ == Retrieving all sites
38
+
39
+ require 'muddyit_fu'
40
+ muddyit = Muddyit.new('muddyit.yml')
41
+ muddyit.sites.find(:all).each do |site|
42
+ puts "#{site.label} : #{site.token}"
43
+ end
44
+
45
+ == Retrieving a single site
46
+
47
+ require 'muddyit_fu'
48
+ muddyit = Muddyit.new('muddyit.yml')
49
+ puts muddyit.sites.find('a0ret4').label
50
+
51
+ == Categorisation request
52
+
53
+ require 'muddyit_fu'
54
+ muddyit = Muddyit.new('muddyit.yml')
55
+ site = muddyit.sites.first
56
+ site.pages.create({:identifier => 'http://news.bbc.co.uk/1/hi/uk_politics/8011321.stm'}, {:minium_confidence => 0.2})
57
+
58
+ == View categorised pages
59
+
60
+ require 'muddyit_fu'
61
+ muddyit = Muddyit.new(:consumer_key => 'aaa',
62
+ :consumer_secret => 'bbb',
63
+ :access_token => 'ccc',
64
+ :access_token_secret => 'ddd')
65
+ site = muddyit.sites.first
66
+ site.pages.find(:all) do |page|
67
+ puts page.title
68
+ page.entities.each do |entity|
69
+ puts entity.uri
70
+ end
71
+ end
72
+
73
+ == View all pages containing 'Gordon Brown'
74
+
75
+ require 'muddyit_fu'
76
+ muddyit = Muddyit.new('muddyit.yml')
77
+ site = muddyit.sites.find(:all).first
78
+ site.pages.find_by_entity('http://dbpedia.org/resource/Gordon_Brown') do |page|
79
+ puts page.identifier
80
+ end
81
+
82
+ == Find related entities for 'Gordon Brown'
83
+
84
+ require 'muddyit_fu'
85
+ muddyit = Muddyit.new('muddyit.yml')
86
+ site = muddyit.sites.find(:all).first
87
+ puts "Related entity\tOccurance
88
+ site.entities.find_related('http://dbpedia.org/resource/Gordon_Brown').each do |entry|
89
+ puts "#{entry[:enity].uri}\t#{entry[:count]}"
90
+ end
91
+
92
+ == Find related content for : http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm
93
+
94
+ require 'muddyit_fu'
95
+ muddyit = Muddyit.new('muddyit.yml')
96
+ site = muddyit.sites.find(:all).first
97
+ page = site.pages.find(:all, :uri => 'http://news.bbc.co.uk/1/hi/uk_politics/7878418.stm').first
98
+ puts "Our page : #{page.title}\n\n"
99
+ page.related_content.each do |results|
100
+ puts "#{results[:page].title} #{results[:count]}"
101
+ end
102
+
103
+ == Obtaining oauth access credentials
104
+
105
+ See http://gist.github.com/178993
106
+
107
+ == Contact
108
+
109
+ Author: Rob Lee
110
+ Email: support [at] muddy.it
111
+ Main Repository: http://github.com/rattle/muddyit_fu/tree/master
data/Rakefile ADDED
@@ -0,0 +1,59 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "muddyit_fu"
8
+ gem.summary = "Provides a ruby interface to muddy.it"
9
+ gem.email = "support[at]muddy.it"
10
+ gem.homepage = "http://github.com/rattle/muddyit_fu"
11
+ gem.authors = ["rattle"]
12
+ gem.add_dependency('json', '>= 0.0.0')
13
+ gem.add_dependency('oauth', '>= 0.3.6')
14
+
15
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
+ end
17
+
18
+ rescue LoadError
19
+ puts "Jeweler (or a dependency) not available. Install it with: sudo gem install jeweler"
20
+ end
21
+
22
+ require 'rake/testtask'
23
+ Rake::TestTask.new(:test) do |test|
24
+ test.libs << 'lib' << 'test'
25
+ test.pattern = 'test/**/*_test.rb'
26
+ test.verbose = true
27
+ end
28
+
29
+ begin
30
+ require 'rcov/rcovtask'
31
+ Rcov::RcovTask.new do |test|
32
+ test.libs << 'test'
33
+ test.pattern = 'test/**/*_test.rb'
34
+ test.verbose = true
35
+ end
36
+ rescue LoadError
37
+ task :rcov do
38
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
39
+ end
40
+ end
41
+
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ if File.exist?('VERSION.yml')
48
+ config = YAML.load(File.read('VERSION.yml'))
49
+ version = "#{config[:major]}.#{config[:minor]}.#{config[:patch]}"
50
+ else
51
+ version = ""
52
+ end
53
+
54
+ rdoc.rdoc_dir = 'rdoc'
55
+ rdoc.title = "muddyit_fu #{version}"
56
+ rdoc.rdoc_files.include('README*')
57
+ rdoc.rdoc_files.include('lib/**/*.rb')
58
+ end
59
+
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.2.6
@@ -0,0 +1,129 @@
1
+ module Muddyit
2
+
3
+ def self.new(*params)
4
+ Muddyit::Base.new(*params)
5
+ end
6
+
7
+ class Base
8
+ class_attr_accessor :http_open_timeout
9
+ class_attr_accessor :http_read_timeout
10
+ attr_accessor :rest_endpoint
11
+ attr_reader :consumer_key, :consumer_secret, :access_token, :access_token_secret
12
+
13
+ @@http_open_timeout = 120
14
+ @@http_read_timeout = 120
15
+
16
+ REST_ENDPOINT = 'http://www.muddy.it'
17
+
18
+ # Set the request signing method
19
+ @@digest1 = OpenSSL::Digest::Digest.new("sha1")
20
+ @@digest256 = nil
21
+ if OpenSSL::OPENSSL_VERSION_NUMBER > 0x00908000
22
+ @@digest256 = OpenSSL::Digest::Digest.new("sha256") rescue nil # Some installation may not support sha256
23
+ end
24
+
25
+ # create a new muddyit object
26
+ #
27
+ # You can either pass a hash with the following attributes:
28
+ #
29
+ # * :consumer_key (Required)
30
+ # the consumer key
31
+ # * :consumer_secret (Required)
32
+ # the consumer secret
33
+ # * :access_token (Required)
34
+ # the token
35
+ # * :access_token_secret (Required)
36
+ # the token secret
37
+ # * :rest_endpoint (Optional)
38
+ # the muddy.it rest service endpoint
39
+ # or:
40
+ # * config_file (Required)
41
+ # yaml file to load configuration from
42
+ #
43
+ # Config Example (yaml file)
44
+ # ---
45
+ # consumer_key: AAA
46
+ # consumer_secret: BBB
47
+ # access_token: CCC
48
+ # access_token_secret: DDD
49
+ #
50
+ def initialize(config_hash_or_file)
51
+ if config_hash_or_file.is_a? Hash
52
+ config_hash_or_file.nested_symbolize_keys!
53
+ @consumer_key = config_hash_or_file[:consumer_key]
54
+ @consumer_secret = config_hash_or_file[:consumer_secret]
55
+ @access_token = config_hash_or_file[:access_token]
56
+ @access_token_secret = config_hash_or_file[:access_token_secret]
57
+ @rest_endpoint = config_hash_or_file.has_key?(:rest_endpoint) ? config_hash_or_file[:rest_endpoint] : REST_ENDPOINT
58
+ raise 'config_hash must contain consumer_key and consumer_secret' unless @consumer_key and @consumer_secret
59
+ else
60
+ config = YAML.load_file(config_hash_or_file)
61
+ config.nested_symbolize_keys!
62
+ @consumer_key = config[:consumer_key]
63
+ @consumer_secret = config[:consumer_secret]
64
+ @access_token = config[:access_token]
65
+ @access_token_secret = config[:access_token_secret]
66
+ @rest_endpoint = config.has_key?(:rest_endpoint) ? config[:rest_endpoint] : REST_ENDPOINT
67
+ raise 'config file must contain consumer_key and consumer_secret' unless @consumer_key and @consumer_secret
68
+ end
69
+
70
+ @consumer = OAuth::Consumer.new(@consumer_key, @consumer_secret, {:site=>@rest_endpoint})
71
+ @accesstoken = OAuth::AccessToken.new(@consumer, @access_token, @access_token_secret)
72
+
73
+ end
74
+
75
+ # sends a request to the muddyit REST api
76
+ #
77
+ # Params
78
+ # * api_url (Required)
79
+ # the request url (uri.path)
80
+ # * http_method (Optional)
81
+ # choose between GET (default), POST, PUT, DELETE http request.
82
+ # * options (Optional)
83
+ # hash of query parameters, you do not need to include access_key_id, secret_access_key because these are added automatically
84
+ #
85
+ def send_request(api_url, http_method = :get, opts = {}, body = nil)
86
+
87
+ raise 'no api_url supplied' unless api_url
88
+ res = request_over_http(api_url, http_method, opts, body)
89
+ # Strip any js wrapping methods
90
+
91
+ if res.body =~ /^.+\((.+)\)$/
92
+ r = JSON.parse($1)
93
+ else
94
+ r = JSON.parse(res.body)
95
+ end
96
+
97
+ return r
98
+ end
99
+
100
+
101
+ # creates and/or returns the Muddyit::Sites object
102
+ def sites() @sites ||= Muddyit::Sites.new(self) end
103
+
104
+ protected
105
+
106
+ # For easier testing. You can mock this method with a XML file you re expecting to receive
107
+ def request_over_http(api_url, http_method, opts, body)
108
+
109
+ http_opts = { "Accept" => "application/json", "Content-Type" => "application/json", "User-Agent" => "muddyit_fu" }
110
+ query_string = opts.to_a.map {|x| x.join("=")}.join("&")
111
+
112
+ case http_method
113
+ when :get
114
+ url = opts.empty? ? api_url : "#{api_url}?#{query_string}"
115
+ @accesstoken.get(url, http_opts)
116
+ when :post
117
+ @accesstoken.post(api_url, body, http_opts)
118
+ when :put
119
+ @accesstoken.put(api_url, body, http_opts)
120
+ when :delete
121
+ @accesstoken.delete(api_url, http_opts)
122
+ else
123
+ raise 'invalid http method specified'
124
+ end
125
+
126
+ end
127
+
128
+ end
129
+ end
@@ -0,0 +1,3 @@
1
+ class Entities
2
+ # Placeholder
3
+ end
@@ -0,0 +1,19 @@
1
+ module Muddyit
2
+
3
+ class Error < RuntimeError
4
+ attr_accessor :code
5
+ end
6
+
7
+ class Errors
8
+ # Method used for raising the appropriate error class for a given error code.
9
+ # Currently raises only Muddyit::Error
10
+ def self.error_for(code, message)
11
+ raise RuntimeError.new("Internal error. Muddyit API error not identified or unknown error.") if (code.nil? || message.nil? || message.empty?)
12
+ raise RuntimeError.new("Internal error. Unknown error.") if code.to_i == 0 # We assume that error code 0 is never returned
13
+ e = Muddyit::Error.new("#{code}: #{message}")
14
+ e.code = code
15
+ raise e
16
+ end
17
+ end
18
+
19
+ end
@@ -0,0 +1,59 @@
1
+ class Muddyit::Generic < Muddyit::Base
2
+
3
+ # superclass for data objects to inherit
4
+ #
5
+ # allows us to change the api with little code change via the magic of method
6
+ # missing :)
7
+ #
8
+
9
+ attr_accessor :attributes
10
+
11
+ # constructor
12
+ #
13
+ # Params
14
+ # * muddyit (Required)
15
+ # a muddyit::base object
16
+ # * attributes (Optional)
17
+ # hash of method => value entries used to simulate methods on a real object
18
+ #
19
+ def initialize(muddyit, attributes = {})
20
+ @muddyit = muddyit
21
+ @attributes = attributes.nested_symbolize_keys!
22
+ @info_added = false
23
+ end
24
+
25
+ # request data from muddy.it if we haven't done so before and we don't have
26
+ # the attribute requested (acts as getter + setter)
27
+ #
28
+ # Params
29
+ # * method (Required)
30
+ # the object method to populate, from attributes or remotely
31
+ # * args (Optional)
32
+ # the value to set the method to
33
+ #
34
+ def method_missing(method, args = nil)
35
+ if @info_added == false and !@attributes.has_key?(method.to_sym)
36
+ #puts "Searching for missing method #{method.to_s}"
37
+ @attributes.merge!(self.fetch)
38
+ @info_added = true
39
+ end
40
+ unless @attributes.has_key?(method.to_sym)
41
+ raise "No method named #{method.to_s}"
42
+ end
43
+ if args.nil?
44
+ @attributes[method.to_sym]
45
+ else
46
+ @attributes[method.to_sym] = args
47
+ return true
48
+ end
49
+ end
50
+
51
+ protected
52
+
53
+ # method used to retrieve data from muddy.it service, to be overridden
54
+ #
55
+ def fetch
56
+ raise "not implemented"
57
+ end
58
+
59
+ end
@@ -0,0 +1,35 @@
1
+ class Muddyit::Sites::Site::Entities::Entity < Muddyit::Generic
2
+
3
+ def classification
4
+ unless @attributes[:type]
5
+ # We merge here as we don't want to overwrite a entity specific confidence score
6
+ @attributes.merge!(self.fetch)
7
+ end
8
+ @attributes[:type]
9
+ end
10
+
11
+ # retrieve entities related to the specified entity within the site entities collection
12
+ #
13
+ # Params
14
+ # * options (Optional)
15
+ #
16
+ def related(options = {})
17
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(@attributes[:uri])}/related"
18
+ response = @muddyit.send_request(api_url, :get, options)
19
+
20
+ results = []
21
+ response.each { |result|
22
+ # The return format needs sorting out here .......
23
+ results.push Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
24
+ }
25
+ return results
26
+ end
27
+
28
+ protected
29
+ def fetch
30
+ api_url = "/sites/#{@attributes[:site][:token]}/entities/#{Digest::MD5.hexdigest(@attributes[:uri])}"
31
+ response = @muddyit.send_request(api_url, :get)
32
+ response.nested_symbolize_keys!
33
+ end
34
+
35
+ end
@@ -0,0 +1,22 @@
1
+ class Muddyit::Sites::Site::Entities < Muddyit::Generic
2
+ # Placeholder
3
+
4
+ # retrieve entities related to the specified entity within the site entities collection
5
+ #
6
+ # Params
7
+ # * options (Optional)
8
+ #
9
+ def find_related(uri, options = {})
10
+
11
+ raise if uri.nil?
12
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(uri)}/related"
13
+ response = @muddyit.send_request(api_url, :get, options)
14
+
15
+ results = []
16
+ response.each { |result|
17
+ results.push :count => result.delete('count'), :entity => Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
18
+ }
19
+ return results
20
+ end
21
+
22
+ end
@@ -0,0 +1,21 @@
1
+ class Muddyit::Sites::Site::Pages::Page::ExtractedContent < Muddyit::Generic
2
+
3
+ def initialize(muddyit, attributes)
4
+ super(muddyit, attributes)
5
+ populate_terms
6
+ end
7
+
8
+
9
+ protected
10
+
11
+ def populate_terms
12
+ terms = []
13
+ if @attributes.has_key?(:terms)
14
+ @attributes[:terms].each do |term|
15
+ terms.push term['term']
16
+ end
17
+ @attributes[:terms] = terms
18
+ end
19
+ end
20
+
21
+ end
@@ -0,0 +1,88 @@
1
+ class Muddyit::Sites::Site::Pages::Page < Muddyit::Generic
2
+
3
+ # Create a set of entities from the categorisation results
4
+ def initialize(muddyit, attributes = {})
5
+ super(muddyit, attributes)
6
+ create_entities
7
+ @extracted_content_cache = nil
8
+ end
9
+
10
+ # submit a page or text for re-categorisation
11
+ #
12
+ # Params
13
+ # * options (Required)
14
+ #
15
+ def update(options = {})
16
+
17
+ # Ensure we get extracted_content as well
18
+ options[:include_content] = true
19
+
20
+ body = { :page => { :uri => self.uri, :options => options } }
21
+
22
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{self.identifier}"
23
+ response = @muddyit.send_request(api_url, :put, {}, body.to_json)
24
+ return Muddyit::Sites::Site::Pages::Page.new(@muddyit, response['page'].merge!(:site => self.site))
25
+ end
26
+
27
+
28
+ # get extracted_content for page
29
+ #
30
+ def extracted_content
31
+ if @extracted_content_cache.nil?
32
+ if @attributes[:extracted_content]
33
+ @extracted_content_cache = Muddyit::Sites::Site::Pages::Page::ExtractedContent.new(@muddyit, @attributes[:extracted_content])
34
+ else
35
+ r = self.fetch
36
+ @extracted_content_cache = Muddyit::Sites::Site::Pages::Page::ExtractedContent.new(@muddyit, r[:extracted_content])
37
+ end
38
+ end
39
+ @extracted_content_cache
40
+ end
41
+
42
+
43
+ # delete the page
44
+ #
45
+ def destroy
46
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}"
47
+ response = @muddyit.send_request(api_url, :delete, {})
48
+ # Is this the correct thing to return ?
49
+ return true
50
+ end
51
+
52
+ # retrieve related pages
53
+ #
54
+ # Params
55
+ # * options (Optional)
56
+ #
57
+ def related_content(options = {})
58
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}/related"
59
+ response = @muddyit.send_request(api_url, :get, options, nil)
60
+ results = []
61
+ response.each { |result|
62
+ # The return format needs sorting out here .......
63
+ results.push :page => @attributes[:site].pages.find(result['identifier']), :count => result['count']
64
+ }
65
+ return results
66
+ end
67
+
68
+ protected
69
+ def fetch
70
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{@attributes[:identifier]}"
71
+
72
+ response = @muddyit.send_request(api_url, :get, {:include_content => true}, nil)
73
+
74
+ response.nested_symbolize_keys!
75
+ end
76
+
77
+ # Convert results to entities
78
+ def create_entities
79
+ results = []
80
+ if @attributes.has_key?(:entities)
81
+ @attributes[:entities].each do |result|
82
+ results.push Muddyit::Sites::Site::Entities::Entity.new(@muddyit, result)
83
+ end
84
+ @attributes[:entities] = results
85
+ end
86
+ end
87
+
88
+ end
@@ -0,0 +1,172 @@
1
+ class Muddyit::Sites::Site::Pages < Muddyit::Generic
2
+
3
+ # find a specific page from the site
4
+ #
5
+ # Params
6
+ # * type (Required)
7
+ # one of :all or a page identifier
8
+ #
9
+ def find(type, options = {})
10
+ raise 'no type specified' if type.nil?
11
+
12
+ if type.is_a? Symbol
13
+ case type
14
+ when :all
15
+ api_url = "/sites/#{self.site.attributes[:token]}/pages"
16
+ if block_given?
17
+ token = nil
18
+ begin
19
+ response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
20
+ response['pages'].each { |page|
21
+ yield Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site))
22
+ }
23
+ token = response['next_page']
24
+ # Need to figure out which of the below actually occurs
25
+ end while !token.nil? || !token == ''
26
+ else
27
+ api_url = "/sites/#{self.site.attributes[:token]}/pages"
28
+ response = @muddyit.send_request(api_url, :get, options)
29
+
30
+ pages = []
31
+ response['pages'].each { |page| pages.push Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site)) }
32
+ return { :next_page => response['next_page'], :pages => pages }
33
+ end
34
+ else
35
+ raise 'invalid type specified'
36
+ end
37
+
38
+ elsif type.is_a? String
39
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/#{type}"
40
+ response = @muddyit.send_request(api_url, :get, {})
41
+ response.has_key?('identifier') ? Muddyit::Sites::Site::Pages::Page.new(@muddyit, response.merge!(:site => self.site)) : nil
42
+ end
43
+ end
44
+
45
+
46
+
47
+ # submit a page or text for categorisation
48
+ #
49
+ # Params
50
+ # * options (Required)
51
+ #
52
+ def create(doc = {}, options = {})
53
+
54
+ # Ensure we get content_data as well
55
+ options[:include_content] = true
56
+
57
+ # Ensure we have encoded the identifier and URI
58
+ unless doc[:uri] || doc[:text]
59
+ raise
60
+ end
61
+
62
+ body = { :page => doc.merge!(:options => options) }
63
+
64
+ api_url = "/sites/#{self.site.attributes[:token]}/pages/"
65
+ response = @muddyit.send_request(api_url, :post, {}, body.to_json)
66
+ return Muddyit::Sites::Site::Pages::Page.new(@muddyit, response['page'].merge!(:site => self.site))
67
+ end
68
+
69
+ # find all pages with specified entity
70
+ #
71
+ # Params
72
+ # * uri (Required)
73
+ # a dbpedia URI
74
+ # * options (Optional)
75
+ #
76
+ #
77
+ def find_by_entity(uri, options = {}, &block)
78
+ queryAllWithURI(uri, options, &block)
79
+ end
80
+
81
+ # find all pages with specified entities
82
+ #
83
+ # Params
84
+ # * uris (Required)
85
+ # an array of dbpedia URIs
86
+ # * options (Optional)
87
+ #
88
+ #
89
+ def find_by_entities(uris, options = {}, &block)
90
+ queryAllWithURI(uris.join(','), options, &block)
91
+ end
92
+
93
+ # find all pages with specified term
94
+ #
95
+ # Params
96
+ # * term (Required)
97
+ # a string e.g. 'Gordon Brown'
98
+ # * options (Optional)
99
+ #
100
+ #
101
+ def find_by_term(term, options = {}, &block)
102
+ queryAllWithTerm(term, options, &block)
103
+ end
104
+
105
+ # find all pages with specified terms
106
+ #
107
+ # Params
108
+ # * terms (Required)
109
+ # an array of strings e.g. ['Gordon Brown', 'Tony Blair']
110
+ # * options (Optional)
111
+ #
112
+ #
113
+ def find_by_terms(terms, options = {}, &block)
114
+ queryAllWithTerm(terms.join(','), options, &block)
115
+ end
116
+
117
+ protected
118
+
119
+ # find all pages with specified entit(y|ies)
120
+ #
121
+ # multiple uris may be specified using commas
122
+ #
123
+ # Params
124
+ # * options (Required)
125
+ # must contain uri parameter which corresponds to dbpedia uri
126
+ #
127
+ def queryAllWithURI(uri, options, &block)
128
+ api_url = "/sites/#{self.site.attributes[:token]}/entities/#{Digest::MD5.hexdigest(uri)}"
129
+ query_page(api_url, options, &block)
130
+ end
131
+
132
+ # find all pages with specified term(s)
133
+ #
134
+ # multiple terms may be specified using commas
135
+ #
136
+ # Params
137
+ # * options (Required)
138
+ #
139
+ #
140
+ def queryAllWithTerm(term, options, &block)
141
+ api_url = "/sites/#{self.site.attributes[:token]}/terms/#{term}"
142
+ query_page(api_url, options, &block)
143
+ end
144
+
145
+ # utility method for term and uri query calls
146
+ #
147
+ # Params
148
+ # * api_url (Required)
149
+ # must contain uri to make request to
150
+ #
151
+ def query_page(api_url, options)
152
+ if block_given?
153
+ token = nil
154
+ begin
155
+ options.merge!(:page => token) unless token.nil?
156
+ response = @muddyit.send_request(api_url, :get, options.merge!(:page => token))
157
+ response['pages'].each { |page|
158
+ yield Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site))
159
+ }
160
+ token = response['next_page']
161
+ # Need to figure out which of the below actually occurs
162
+ end while !token.nil? || !token == ''
163
+ else
164
+ response = @muddyit.send_request(api_url, :get, {})
165
+
166
+ pages = []
167
+ response['pages'].each { |page| pages.push Muddyit::Sites::Site::Pages::Page.new(@muddyit, page.merge!(:site => self.site)) }
168
+ return { :next_page => response[:next_page], :pages => pages }
169
+ end
170
+ end
171
+
172
+ end
@@ -0,0 +1,15 @@
1
+ class Muddyit::Sites::Site < Muddyit::Generic
2
+
3
+ # get pages object for site
4
+ #
5
+ def pages() @pages ||= Muddyit::Sites::Site::Pages.new(@muddyit, :site => self) end
6
+ def entities() @entities ||= Muddyit::Sites::Site::Entities.new(@muddyit, :site => self) end
7
+
8
+ protected
9
+ def fetch
10
+ api_url = "/sites/#{@attributes[:token]}"
11
+ response = @muddyit.send_request(api_url, :get, {})
12
+ response['site'].nested_symbolize_keys!
13
+ end
14
+
15
+ end
@@ -0,0 +1,45 @@
1
+ class Muddyit::Sites < Muddyit::Base
2
+
3
+ # create a new sites object
4
+ # not a muddyit:generic as it doesn't need the method missing loader
5
+ #
6
+ # Params :
7
+ #
8
+ # * muddyit (Required)
9
+ # a muddyit::base instance
10
+ #
11
+ def initialize(muddyit)
12
+ @muddyit = muddyit
13
+ end
14
+
15
+ # find a specific site
16
+ #
17
+ # Params
18
+ # * type (Required)
19
+ # one of :all or a site token
20
+ #
21
+ def find(type, options = {})
22
+ raise 'no type specified' unless type
23
+
24
+ if type.is_a? Symbol
25
+ case type
26
+ when :all
27
+ api_url = "/sites/"
28
+ response = @muddyit.send_request(api_url, :get, options)
29
+ sites = []
30
+ response.each { |site| sites.push Muddyit::Sites::Site.new(@muddyit, site['site']) }
31
+ return sites
32
+ else
33
+ raise 'invalid type specified'
34
+ end
35
+ elsif type.is_a? String
36
+ api_url = "/sites/#{type}"
37
+ response = @muddyit.send_request(api_url, :get, options)
38
+ return Muddyit::Sites::Site.new(@muddyit, response['site'])
39
+ else
40
+ raise 'invalid type specified'
41
+ end
42
+
43
+ end
44
+
45
+ end
data/lib/muddyit_fu.rb ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'net/http'
3
+ require 'cgi'
4
+ require 'json'
5
+ #require 'json/ext'
6
+ #gem 'monkeyhelper-oauth', :lib => 'lib/oauth'
7
+ require 'oauth/consumer'
8
+ require 'digest/md5'
9
+
10
+ require 'pp'
11
+
12
+ class Module
13
+ def class_attr_accessor(attribute_name)
14
+ class_eval <<-CODE
15
+ def self.#{attribute_name}
16
+ @@#{attribute_name} ||= nil
17
+ end
18
+ def self.#{attribute_name}=(value)
19
+ @@#{attribute_name} = value
20
+ end
21
+ CODE
22
+ end
23
+ end
24
+
25
+
26
+ class Hash
27
+ # File merb/core_ext/hash.rb, line 166
28
+ def nested_symbolize_keys!
29
+ each do |k,v|
30
+ sym = k.respond_to?(:to_sym) ? k.to_sym : k
31
+ self[sym] = Hash === v ? v.nested_symbolize_keys! : v
32
+ delete(k) unless k == sym
33
+ end
34
+ self
35
+ end
36
+
37
+ def nested_stringify_keys!
38
+ each do |k,v|
39
+ s = k.respond_to?(:to_s) ? k.to_s : k
40
+ self[s] = Hash === v ? v.nested_stringify_keys! : v
41
+ delete(k) unless k == s
42
+ end
43
+ self
44
+ end
45
+
46
+ end
47
+
48
+ # base must load first
49
+ %w(base errors generic sites entities sites/site sites/pages sites/pages/page sites/pages/page/extracted_content sites/entities sites/entities/entity).each do |file|
50
+ require File.join(File.dirname(__FILE__), 'muddyit', file)
51
+ end
52
+
53
+
@@ -0,0 +1,57 @@
1
+ # -*- encoding: utf-8 -*-
2
+
3
+ Gem::Specification.new do |s|
4
+ s.name = %q{muddyit_fu}
5
+ s.version = "0.2.6"
6
+
7
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
8
+ s.authors = ["rattle"]
9
+ s.date = %q{2009-11-04}
10
+ s.email = %q{support[at]muddy.it}
11
+ s.extra_rdoc_files = [
12
+ "LICENSE",
13
+ "README.rdoc"
14
+ ]
15
+ s.files = [
16
+ ".gitignore",
17
+ "CHANGELOG",
18
+ "LICENSE",
19
+ "README.rdoc",
20
+ "Rakefile",
21
+ "VERSION",
22
+ "lib/muddyit/base.rb",
23
+ "lib/muddyit/entities.rb",
24
+ "lib/muddyit/errors.rb",
25
+ "lib/muddyit/generic.rb",
26
+ "lib/muddyit/sites.rb",
27
+ "lib/muddyit/sites/entities.rb",
28
+ "lib/muddyit/sites/entities/entity.rb",
29
+ "lib/muddyit/sites/pages.rb",
30
+ "lib/muddyit/sites/pages/page.rb",
31
+ "lib/muddyit/sites/pages/page/extracted_content.rb",
32
+ "lib/muddyit/sites/site.rb",
33
+ "lib/muddyit_fu.rb",
34
+ "muddyit_fu.gemspec"
35
+ ]
36
+ s.homepage = %q{http://github.com/rattle/muddyit_fu}
37
+ s.rdoc_options = ["--charset=UTF-8"]
38
+ s.require_paths = ["lib"]
39
+ s.rubygems_version = %q{1.3.5}
40
+ s.summary = %q{Provides a ruby interface to muddy.it}
41
+
42
+ if s.respond_to? :specification_version then
43
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
44
+ s.specification_version = 3
45
+
46
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
47
+ s.add_runtime_dependency(%q<json>, [">= 0.0.0"])
48
+ s.add_runtime_dependency(%q<oauth>, [">= 0.3.6"])
49
+ else
50
+ s.add_dependency(%q<json>, [">= 0.0.0"])
51
+ s.add_dependency(%q<oauth>, [">= 0.3.6"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<json>, [">= 0.0.0"])
55
+ s.add_dependency(%q<oauth>, [">= 0.3.6"])
56
+ end
57
+ end
metadata ADDED
@@ -0,0 +1,93 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: muddyit_fu
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.6
5
+ platform: ruby
6
+ authors:
7
+ - rattle
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-11-04 00:00:00 +00:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: json
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 0.0.0
24
+ version:
25
+ - !ruby/object:Gem::Dependency
26
+ name: oauth
27
+ type: :runtime
28
+ version_requirement:
29
+ version_requirements: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - ">="
32
+ - !ruby/object:Gem::Version
33
+ version: 0.3.6
34
+ version:
35
+ description:
36
+ email: support[at]muddy.it
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - .gitignore
46
+ - CHANGELOG
47
+ - LICENSE
48
+ - README.rdoc
49
+ - Rakefile
50
+ - VERSION
51
+ - lib/muddyit/base.rb
52
+ - lib/muddyit/entities.rb
53
+ - lib/muddyit/errors.rb
54
+ - lib/muddyit/generic.rb
55
+ - lib/muddyit/sites.rb
56
+ - lib/muddyit/sites/entities.rb
57
+ - lib/muddyit/sites/entities/entity.rb
58
+ - lib/muddyit/sites/pages.rb
59
+ - lib/muddyit/sites/pages/page.rb
60
+ - lib/muddyit/sites/pages/page/extracted_content.rb
61
+ - lib/muddyit/sites/site.rb
62
+ - lib/muddyit_fu.rb
63
+ - muddyit_fu.gemspec
64
+ has_rdoc: true
65
+ homepage: http://github.com/rattle/muddyit_fu
66
+ licenses: []
67
+
68
+ post_install_message:
69
+ rdoc_options:
70
+ - --charset=UTF-8
71
+ require_paths:
72
+ - lib
73
+ required_ruby_version: !ruby/object:Gem::Requirement
74
+ requirements:
75
+ - - ">="
76
+ - !ruby/object:Gem::Version
77
+ version: "0"
78
+ version:
79
+ required_rubygems_version: !ruby/object:Gem::Requirement
80
+ requirements:
81
+ - - ">="
82
+ - !ruby/object:Gem::Version
83
+ version: "0"
84
+ version:
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.5
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Provides a ruby interface to muddy.it
92
+ test_files: []
93
+