yahoo_term_extraction 0.2.3

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,26 @@
1
+ Copyright (c) 2008 Deb Bassett
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ The Yahoo Term Extraction module is copywrited free software by Deb Bassett
25
+ <deb@urbanwide.com>. You can redistribute it under the terms specified in
26
+ the COPYING file of the Ruby distribution.
@@ -0,0 +1,52 @@
1
+ require 'rexml/document'
2
+ require 'net/http'
3
+
4
+ module Yahoo
5
+
6
+ class APIError < Exception; end
7
+
8
+ # Yahoo Term Extractor ruby class.
9
+ # Be warned that Yahoo rate limit this api to 5000 queries a day!
10
+ # You need an appid from yahoo to use this.
11
+ class TermExtractor
12
+
13
+ API_URL = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
14
+
15
+ # term_extractor = YahooTermExtractor.new("appid")
16
+ def initialize(appid)
17
+ raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
18
+ @appid = appid
19
+ end
20
+
21
+ # term_extractor.extract_terms(" Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.", :query => "leeds")
22
+ def extract_terms(context, args = {})
23
+ raise ArgumentError, 'context must be supplied' if context.empty?
24
+ params = { 'appid' => @appid, 'context' => context}
25
+ params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?
26
+
27
+ # DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
28
+ response = Net::HTTP.post_form(API_URL, params)
29
+ unless response.code == '200'
30
+ raise Yahoo::APIError, "#{response.code} received from Yahoo API"
31
+ end
32
+
33
+ xml = REXML::Document.new response.body
34
+ if !xml.root.nil? and xml.root.name == "ResultSet"
35
+ terms = []
36
+ xml.each_element("/ResultSet/Result") do |term|
37
+ terms << term.text
38
+ end
39
+ elsif !xml.root.nil? and xml.root.name == "Error"
40
+ messages = []
41
+ xml.each_element("/Error/Message") do |message|
42
+ messages << message.text
43
+ end
44
+ raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
45
+ else
46
+ raise Yahoo::APIError, "Unknown document returned: #{response.body}"
47
+ end
48
+
49
+ return terms
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,92 @@
1
+ # require File.dirname(__FILE__) + '/spec_helper'
2
+ require File.dirname(__FILE__) + '/../lib/yahoo_term_extractor'
3
+
4
+ describe Yahoo::TermExtractor do
5
+ describe "new" do
6
+
7
+ it "should create a Yahoo::TermExtractor object" do
8
+ Yahoo::TermExtractor.new("appid").should be_an_instance_of(Yahoo::TermExtractor)
9
+ end
10
+
11
+ it "should not raise an exception if appid is supplied" do
12
+ lambda {Yahoo::TermExtractor.new("appid")}.should_not raise_error(ArgumentError, "appid must be supplied")
13
+ end
14
+
15
+ it "should raise an exception if an appid is nil" do
16
+ lambda {Yahoo::TermExtractor.new(nil)}.should raise_error(ArgumentError, "appid must be supplied")
17
+ end
18
+
19
+ it "should raise an exception if an appid is empty" do
20
+ lambda {Yahoo::TermExtractor.new("")}.should raise_error(ArgumentError, "appid must be supplied")
21
+ end
22
+
23
+ end
24
+
25
+ describe "extract_terms" do
26
+ @@context = "Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region."
27
+ @@good_xml = %{
28
+ <ResultSet xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractorResponse.xsd' xmlns='urn:yahoo:cate'>
29
+ <Result>leeds city</Result>
30
+ <Result>selby</Result>
31
+ <Result>proposals</Result>
32
+ <Result>principle</Result>
33
+ <Result>partnership</Result>
34
+ </ResultSet>
35
+ }
36
+ @@error_xml = %{
37
+ <Error xmlns="urn:yahoo:api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://api.yahoo.com/Api/V1/error.xsd">The following errors were detected: <Message>limit exceeded</Message></Error>
38
+ }
39
+ @@unknown_xml = %{<thing>why do I bother?</thing>}
40
+ @@query = 'leeds'
41
+
42
+ before(:each) do
43
+ @appid = "test-app-id"
44
+ @api_uri = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
45
+ @mock_response = mock("response")
46
+ @term_extractor = Yahoo::TermExtractor.new(@appid)
47
+ end
48
+
49
+ it "should raise an exception if context is nil" do
50
+ lambda {@term_extractor.extract_terms()}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
51
+ end
52
+
53
+ it "should raise an exception if context is empty" do
54
+ lambda {@term_extractor.extract_terms("")}.should raise_error(ArgumentError, "context must be supplied")
55
+ end
56
+
57
+ it "should raise an exception if a non-200 response code is returned" do
58
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
59
+ @response.should_receive(:code).twice.and_return("404")
60
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "404 received from Yahoo API")
61
+ end
62
+
63
+ it "should return an array of terms given a context" do
64
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
65
+ @response.should_receive(:code).once.and_return("200")
66
+ @response.should_receive(:body).and_return(@@good_xml)
67
+ @term_extractor.extract_terms(@@context).should == ["leeds city", "selby", "proposals", "principle", "partnership"]
68
+ end
69
+
70
+ it "should raise an exception if an error xml resultset is returned" do
71
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
72
+ @response.should_receive(:code).once.and_return("200")
73
+ @response.should_receive(:body).and_return(@@error_xml)
74
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "The following errors were detected: limit exceeded")
75
+ end
76
+
77
+ it "should raise an exception if an unknown xml document is returned" do
78
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
79
+ @response.should_receive(:code).once.and_return("200")
80
+ @response.should_receive(:body).twice.and_return(@@unknown_xml)
81
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "Unknown document returned: " + @@unknown_xml)
82
+ end
83
+
84
+ it "should accept query as an optional parameter" do
85
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context, 'query' => @@query})
86
+ @response.should_receive(:code).once.and_return("200")
87
+ @response.should_receive(:body).once.and_return(@@good_xml)
88
+ lambda {@term_extractor.extract_terms(@@context, :query => @@query)}.should_not raise_error
89
+ end
90
+ end
91
+
92
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yahoo_term_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ platform: ruby
6
+ authors:
7
+ - Deb Bassett
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-28 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: This library can be used to call the Yahoo Term Extraction Web Service from Ruby. The Term Extraction Web Service provides a list of significant words or phrases extracted from a larger content.
17
+ email: deb@urbanwide.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/yahoo_term_extractor.rb
26
+ - spec/yahoo_term_extractor_spec.rb
27
+ - LICENSE
28
+ has_rdoc: false
29
+ homepage: http://www.urbanwide.com/
30
+ post_install_message:
31
+ rdoc_options: []
32
+
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: "0"
40
+ version:
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ requirements: []
48
+
49
+ rubyforge_project:
50
+ rubygems_version: 1.0.1
51
+ signing_key:
52
+ specification_version: 2
53
+ summary: A library for accessing the Yahoo Term Extraction Web Service
54
+ test_files: []
55
+