RubyGems - yahoo_term_extraction - Versions diffs - 0.2.3 - Mend

yahoo_term_extraction 0.2.3

Files changed (4) hide show

data/LICENSE +26 -0
data/lib/yahoo_term_extractor.rb +52 -0
data/spec/yahoo_term_extractor_spec.rb +92 -0
metadata +55 -0

data/LICENSE ADDED Viewed

@@ -0,0 +1,26 @@
+Copyright (c) 2008 Deb Bassett
+Permission is hereby granted, free of charge, to any person
+obtaining a copy of this software and associated documentation
+files (the "Software"), to deal in the Software without
+restriction, including without limitation the rights to use,
+copy, modify, merge, publish, distribute, sublicense, and/or sell
+copies of the Software, and to permit persons to whom the
+Software is furnished to do so, subject to the following
+conditions:
+The above copyright notice and this permission notice shall be
+included in all copies or substantial portions of the Software.
+THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
+OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
+NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
+HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
+WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
+FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
+OTHER DEALINGS IN THE SOFTWARE.
+The Yahoo Term Extraction module is copywrited free software by Deb Bassett
+<deb@urbanwide.com>. You can redistribute it under the terms specified in
+the COPYING file of the Ruby distribution.

data/lib/yahoo_term_extractor.rb ADDED Viewed

@@ -0,0 +1,52 @@
+require 'rexml/document'
+require 'net/http'
+module Yahoo
+  class APIError < Exception; end
+  # Yahoo Term Extractor ruby class.
+  # Be warned that Yahoo rate limit this api to 5000 queries a day!
+  # You need an appid from yahoo to use this.
+  class TermExtractor
+    API_URL = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
+    #   term_extractor = YahooTermExtractor.new("appid")
+    def initialize(appid)
+      raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
+      @appid = appid
+    end
+    #   term_extractor.extract_terms(" Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.", :query => "leeds")
+    def extract_terms(context, args = {})
+      raise ArgumentError, 'context must be supplied' if context.empty?
+      params = { 'appid' => @appid, 'context' => context}
+      params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?
+      # DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
+      response = Net::HTTP.post_form(API_URL, params)
+      unless response.code == '200'
+        raise Yahoo::APIError, "#{response.code} received from Yahoo API"
+      end
+      xml = REXML::Document.new response.body
+      if !xml.root.nil? and xml.root.name == "ResultSet"
+        terms = []
+        xml.each_element("/ResultSet/Result") do |term|
+          terms << term.text
+        end
+      elsif !xml.root.nil? and xml.root.name == "Error"
+        messages = []
+        xml.each_element("/Error/Message") do |message|
+          messages << message.text
+        end
+        raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
+      else
+        raise Yahoo::APIError, "Unknown document returned: #{response.body}"
+      end
+      return terms
+    end
+  end
+end

data/spec/yahoo_term_extractor_spec.rb ADDED Viewed

@@ -0,0 +1,92 @@
+# require File.dirname(__FILE__) + '/spec_helper'
+require File.dirname(__FILE__) + '/../lib/yahoo_term_extractor'
+describe Yahoo::TermExtractor do
+  describe "new" do
+    it "should create a Yahoo::TermExtractor object" do
+      Yahoo::TermExtractor.new("appid").should be_an_instance_of(Yahoo::TermExtractor)
+    end
+    it "should not raise an exception if appid is supplied" do
+      lambda {Yahoo::TermExtractor.new("appid")}.should_not raise_error(ArgumentError, "appid must be supplied")
+    end
+    it "should raise an exception if an appid is nil" do
+      lambda {Yahoo::TermExtractor.new(nil)}.should raise_error(ArgumentError, "appid must be supplied")
+    end
+    it "should raise an exception if an appid is empty" do
+      lambda {Yahoo::TermExtractor.new("")}.should raise_error(ArgumentError, "appid must be supplied")
+    end
+  end
+  describe "extract_terms" do
+    @@context = "Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region."
+    @@good_xml = %{
+       <ResultSet xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractorResponse.xsd' xmlns='urn:yahoo:cate'>
+         <Result>leeds city</Result>
+         <Result>selby</Result>
+         <Result>proposals</Result>
+         <Result>principle</Result>
+         <Result>partnership</Result>
+       </ResultSet>
+    }
+    @@error_xml = %{
+      <Error xmlns="urn:yahoo:api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://api.yahoo.com/Api/V1/error.xsd">The following errors were detected: <Message>limit exceeded</Message></Error>
+    }
+    @@unknown_xml = %{<thing>why do I bother?</thing>}
+    @@query = 'leeds'
+    before(:each) do
+      @appid = "test-app-id"
+      @api_uri = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
+      @mock_response = mock("response")
+      @term_extractor = Yahoo::TermExtractor.new(@appid)
+    end
+    it "should raise an exception if context is nil" do
+      lambda {@term_extractor.extract_terms()}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
+    end
+    it "should raise an exception if context is empty" do
+      lambda {@term_extractor.extract_terms("")}.should raise_error(ArgumentError, "context must be supplied")
+    end
+    it "should raise an exception if a non-200 response code is returned" do
+      Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
+      @response.should_receive(:code).twice.and_return("404")
+      lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "404 received from Yahoo API")
+    end
+    it "should return an array of terms given a context" do
+      Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
+      @response.should_receive(:code).once.and_return("200")
+      @response.should_receive(:body).and_return(@@good_xml)
+      @term_extractor.extract_terms(@@context).should == ["leeds city", "selby", "proposals", "principle", "partnership"]
+    end
+    it "should raise an exception if an error xml resultset is returned" do
+      Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
+      @response.should_receive(:code).once.and_return("200")
+      @response.should_receive(:body).and_return(@@error_xml)
+      lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "The following errors were detected:  limit exceeded")
+    end
+    it "should raise an exception if an unknown xml document is returned" do
+        Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
+        @response.should_receive(:code).once.and_return("200")
+        @response.should_receive(:body).twice.and_return(@@unknown_xml)
+        lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "Unknown document returned: " + @@unknown_xml)
+    end
+    it "should accept query as an optional parameter" do
+      Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context, 'query' => @@query})
+      @response.should_receive(:code).once.and_return("200")
+      @response.should_receive(:body).once.and_return(@@good_xml)
+      lambda {@term_extractor.extract_terms(@@context, :query => @@query)}.should_not raise_error
+    end
+  end
+end

metadata ADDED Viewed

@@ -0,0 +1,55 @@
+--- !ruby/object:Gem::Specification
+name: yahoo_term_extraction
+version: !ruby/object:Gem::Version
+  version: 0.2.3
+platform: ruby
+authors:
+- Deb Bassett
+autorequire:
+bindir: bin
+cert_chain: []
+date: 2008-07-28 00:00:00 +01:00
+default_executable:
+dependencies: []
+description: This library can be used to call the Yahoo Term Extraction Web Service from Ruby. The Term Extraction Web Service provides a list of significant words or phrases extracted from a larger content.
+email: deb@urbanwide.com
+executables: []
+extensions: []
+extra_rdoc_files: []
+files:
+- lib/yahoo_term_extractor.rb
+- spec/yahoo_term_extractor_spec.rb
+- LICENSE
+has_rdoc: false
+homepage: http://www.urbanwide.com/
+post_install_message:
+rdoc_options: []
+require_paths:
+- lib
+required_ruby_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+required_rubygems_version: !ruby/object:Gem::Requirement
+  requirements:
+  - - ">="
+    - !ruby/object:Gem::Version
+      version: "0"
+  version:
+requirements: []
+rubyforge_project:
+rubygems_version: 1.0.1
+signing_key:
+specification_version: 2
+summary: A library for accessing the Yahoo Term Extraction Web Service
+test_files: []