yahoo_term_extraction 0.2.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,26 @@
1
+ Copyright (c) 2008 Deb Bassett
2
+
3
+ Permission is hereby granted, free of charge, to any person
4
+ obtaining a copy of this software and associated documentation
5
+ files (the "Software"), to deal in the Software without
6
+ restriction, including without limitation the rights to use,
7
+ copy, modify, merge, publish, distribute, sublicense, and/or sell
8
+ copies of the Software, and to permit persons to whom the
9
+ Software is furnished to do so, subject to the following
10
+ conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
17
+ OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
19
+ HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
20
+ WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
21
+ FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
22
+ OTHER DEALINGS IN THE SOFTWARE.
23
+
24
+ The Yahoo Term Extraction module is copywrited free software by Deb Bassett
25
+ <deb@urbanwide.com>. You can redistribute it under the terms specified in
26
+ the COPYING file of the Ruby distribution.
@@ -0,0 +1,52 @@
1
+ require 'rexml/document'
2
+ require 'net/http'
3
+
4
+ module Yahoo
5
+
6
+ class APIError < Exception; end
7
+
8
+ # Yahoo Term Extractor ruby class.
9
+ # Be warned that Yahoo rate limit this api to 5000 queries a day!
10
+ # You need an appid from yahoo to use this.
11
+ class TermExtractor
12
+
13
+ API_URL = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
14
+
15
+ # term_extractor = YahooTermExtractor.new("appid")
16
+ def initialize(appid)
17
+ raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
18
+ @appid = appid
19
+ end
20
+
21
+ # term_extractor.extract_terms(" Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.", :query => "leeds")
22
+ def extract_terms(context, args = {})
23
+ raise ArgumentError, 'context must be supplied' if context.empty?
24
+ params = { 'appid' => @appid, 'context' => context}
25
+ params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?
26
+
27
+ # DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
28
+ response = Net::HTTP.post_form(API_URL, params)
29
+ unless response.code == '200'
30
+ raise Yahoo::APIError, "#{response.code} received from Yahoo API"
31
+ end
32
+
33
+ xml = REXML::Document.new response.body
34
+ if !xml.root.nil? and xml.root.name == "ResultSet"
35
+ terms = []
36
+ xml.each_element("/ResultSet/Result") do |term|
37
+ terms << term.text
38
+ end
39
+ elsif !xml.root.nil? and xml.root.name == "Error"
40
+ messages = []
41
+ xml.each_element("/Error/Message") do |message|
42
+ messages << message.text
43
+ end
44
+ raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
45
+ else
46
+ raise Yahoo::APIError, "Unknown document returned: #{response.body}"
47
+ end
48
+
49
+ return terms
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,92 @@
1
+ # require File.dirname(__FILE__) + '/spec_helper'
2
+ require File.dirname(__FILE__) + '/../lib/yahoo_term_extractor'
3
+
4
+ describe Yahoo::TermExtractor do
5
+ describe "new" do
6
+
7
+ it "should create a Yahoo::TermExtractor object" do
8
+ Yahoo::TermExtractor.new("appid").should be_an_instance_of(Yahoo::TermExtractor)
9
+ end
10
+
11
+ it "should not raise an exception if appid is supplied" do
12
+ lambda {Yahoo::TermExtractor.new("appid")}.should_not raise_error(ArgumentError, "appid must be supplied")
13
+ end
14
+
15
+ it "should raise an exception if an appid is nil" do
16
+ lambda {Yahoo::TermExtractor.new(nil)}.should raise_error(ArgumentError, "appid must be supplied")
17
+ end
18
+
19
+ it "should raise an exception if an appid is empty" do
20
+ lambda {Yahoo::TermExtractor.new("")}.should raise_error(ArgumentError, "appid must be supplied")
21
+ end
22
+
23
+ end
24
+
25
+ describe "extract_terms" do
26
+ @@context = "Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region."
27
+ @@good_xml = %{
28
+ <ResultSet xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractorResponse.xsd' xmlns='urn:yahoo:cate'>
29
+ <Result>leeds city</Result>
30
+ <Result>selby</Result>
31
+ <Result>proposals</Result>
32
+ <Result>principle</Result>
33
+ <Result>partnership</Result>
34
+ </ResultSet>
35
+ }
36
+ @@error_xml = %{
37
+ <Error xmlns="urn:yahoo:api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://api.yahoo.com/Api/V1/error.xsd">The following errors were detected: <Message>limit exceeded</Message></Error>
38
+ }
39
+ @@unknown_xml = %{<thing>why do I bother?</thing>}
40
+ @@query = 'leeds'
41
+
42
+ before(:each) do
43
+ @appid = "test-app-id"
44
+ @api_uri = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
45
+ @mock_response = mock("response")
46
+ @term_extractor = Yahoo::TermExtractor.new(@appid)
47
+ end
48
+
49
+ it "should raise an exception if context is nil" do
50
+ lambda {@term_extractor.extract_terms()}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
51
+ end
52
+
53
+ it "should raise an exception if context is empty" do
54
+ lambda {@term_extractor.extract_terms("")}.should raise_error(ArgumentError, "context must be supplied")
55
+ end
56
+
57
+ it "should raise an exception if a non-200 response code is returned" do
58
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
59
+ @response.should_receive(:code).twice.and_return("404")
60
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "404 received from Yahoo API")
61
+ end
62
+
63
+ it "should return an array of terms given a context" do
64
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
65
+ @response.should_receive(:code).once.and_return("200")
66
+ @response.should_receive(:body).and_return(@@good_xml)
67
+ @term_extractor.extract_terms(@@context).should == ["leeds city", "selby", "proposals", "principle", "partnership"]
68
+ end
69
+
70
+ it "should raise an exception if an error xml resultset is returned" do
71
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
72
+ @response.should_receive(:code).once.and_return("200")
73
+ @response.should_receive(:body).and_return(@@error_xml)
74
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "The following errors were detected: limit exceeded")
75
+ end
76
+
77
+ it "should raise an exception if an unknown xml document is returned" do
78
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
79
+ @response.should_receive(:code).once.and_return("200")
80
+ @response.should_receive(:body).twice.and_return(@@unknown_xml)
81
+ lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "Unknown document returned: " + @@unknown_xml)
82
+ end
83
+
84
+ it "should accept query as an optional parameter" do
85
+ Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context, 'query' => @@query})
86
+ @response.should_receive(:code).once.and_return("200")
87
+ @response.should_receive(:body).once.and_return(@@good_xml)
88
+ lambda {@term_extractor.extract_terms(@@context, :query => @@query)}.should_not raise_error
89
+ end
90
+ end
91
+
92
+ end
metadata ADDED
@@ -0,0 +1,55 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yahoo_term_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.2.3
5
+ platform: ruby
6
+ authors:
7
+ - Deb Bassett
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2008-07-28 00:00:00 +01:00
13
+ default_executable:
14
+ dependencies: []
15
+
16
+ description: This library can be used to call the Yahoo Term Extraction Web Service from Ruby. The Term Extraction Web Service provides a list of significant words or phrases extracted from a larger content.
17
+ email: deb@urbanwide.com
18
+ executables: []
19
+
20
+ extensions: []
21
+
22
+ extra_rdoc_files: []
23
+
24
+ files:
25
+ - lib/yahoo_term_extractor.rb
26
+ - spec/yahoo_term_extractor_spec.rb
27
+ - LICENSE
28
+ has_rdoc: false
29
+ homepage: http://www.urbanwide.com/
30
+ post_install_message:
31
+ rdoc_options: []
32
+
33
+ require_paths:
34
+ - lib
35
+ required_ruby_version: !ruby/object:Gem::Requirement
36
+ requirements:
37
+ - - ">="
38
+ - !ruby/object:Gem::Version
39
+ version: "0"
40
+ version:
41
+ required_rubygems_version: !ruby/object:Gem::Requirement
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ version: "0"
46
+ version:
47
+ requirements: []
48
+
49
+ rubyforge_project:
50
+ rubygems_version: 1.0.1
51
+ signing_key:
52
+ specification_version: 2
53
+ summary: A library for accessing the Yahoo Term Extraction Web Service
54
+ test_files: []
55
+