yahoo_term_extraction 0.2.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +26 -0
- data/lib/yahoo_term_extractor.rb +52 -0
- data/spec/yahoo_term_extractor_spec.rb +92 -0
- metadata +55 -0
data/LICENSE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2008 Deb Bassett
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
The Yahoo Term Extraction module is copywrited free software by Deb Bassett
|
25
|
+
<deb@urbanwide.com>. You can redistribute it under the terms specified in
|
26
|
+
the COPYING file of the Ruby distribution.
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module Yahoo
|
5
|
+
|
6
|
+
class APIError < Exception; end
|
7
|
+
|
8
|
+
# Yahoo Term Extractor ruby class.
|
9
|
+
# Be warned that Yahoo rate limit this api to 5000 queries a day!
|
10
|
+
# You need an appid from yahoo to use this.
|
11
|
+
class TermExtractor
|
12
|
+
|
13
|
+
API_URL = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
|
14
|
+
|
15
|
+
# term_extractor = YahooTermExtractor.new("appid")
|
16
|
+
def initialize(appid)
|
17
|
+
raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
|
18
|
+
@appid = appid
|
19
|
+
end
|
20
|
+
|
21
|
+
# term_extractor.extract_terms(" Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.", :query => "leeds")
|
22
|
+
def extract_terms(context, args = {})
|
23
|
+
raise ArgumentError, 'context must be supplied' if context.empty?
|
24
|
+
params = { 'appid' => @appid, 'context' => context}
|
25
|
+
params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?
|
26
|
+
|
27
|
+
# DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
|
28
|
+
response = Net::HTTP.post_form(API_URL, params)
|
29
|
+
unless response.code == '200'
|
30
|
+
raise Yahoo::APIError, "#{response.code} received from Yahoo API"
|
31
|
+
end
|
32
|
+
|
33
|
+
xml = REXML::Document.new response.body
|
34
|
+
if !xml.root.nil? and xml.root.name == "ResultSet"
|
35
|
+
terms = []
|
36
|
+
xml.each_element("/ResultSet/Result") do |term|
|
37
|
+
terms << term.text
|
38
|
+
end
|
39
|
+
elsif !xml.root.nil? and xml.root.name == "Error"
|
40
|
+
messages = []
|
41
|
+
xml.each_element("/Error/Message") do |message|
|
42
|
+
messages << message.text
|
43
|
+
end
|
44
|
+
raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
|
45
|
+
else
|
46
|
+
raise Yahoo::APIError, "Unknown document returned: #{response.body}"
|
47
|
+
end
|
48
|
+
|
49
|
+
return terms
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/yahoo_term_extractor'
|
3
|
+
|
4
|
+
describe Yahoo::TermExtractor do
|
5
|
+
describe "new" do
|
6
|
+
|
7
|
+
it "should create a Yahoo::TermExtractor object" do
|
8
|
+
Yahoo::TermExtractor.new("appid").should be_an_instance_of(Yahoo::TermExtractor)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should not raise an exception if appid is supplied" do
|
12
|
+
lambda {Yahoo::TermExtractor.new("appid")}.should_not raise_error(ArgumentError, "appid must be supplied")
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should raise an exception if an appid is nil" do
|
16
|
+
lambda {Yahoo::TermExtractor.new(nil)}.should raise_error(ArgumentError, "appid must be supplied")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should raise an exception if an appid is empty" do
|
20
|
+
lambda {Yahoo::TermExtractor.new("")}.should raise_error(ArgumentError, "appid must be supplied")
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "extract_terms" do
|
26
|
+
@@context = "Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region."
|
27
|
+
@@good_xml = %{
|
28
|
+
<ResultSet xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractorResponse.xsd' xmlns='urn:yahoo:cate'>
|
29
|
+
<Result>leeds city</Result>
|
30
|
+
<Result>selby</Result>
|
31
|
+
<Result>proposals</Result>
|
32
|
+
<Result>principle</Result>
|
33
|
+
<Result>partnership</Result>
|
34
|
+
</ResultSet>
|
35
|
+
}
|
36
|
+
@@error_xml = %{
|
37
|
+
<Error xmlns="urn:yahoo:api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://api.yahoo.com/Api/V1/error.xsd">The following errors were detected: <Message>limit exceeded</Message></Error>
|
38
|
+
}
|
39
|
+
@@unknown_xml = %{<thing>why do I bother?</thing>}
|
40
|
+
@@query = 'leeds'
|
41
|
+
|
42
|
+
before(:each) do
|
43
|
+
@appid = "test-app-id"
|
44
|
+
@api_uri = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
|
45
|
+
@mock_response = mock("response")
|
46
|
+
@term_extractor = Yahoo::TermExtractor.new(@appid)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should raise an exception if context is nil" do
|
50
|
+
lambda {@term_extractor.extract_terms()}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should raise an exception if context is empty" do
|
54
|
+
lambda {@term_extractor.extract_terms("")}.should raise_error(ArgumentError, "context must be supplied")
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should raise an exception if a non-200 response code is returned" do
|
58
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
59
|
+
@response.should_receive(:code).twice.and_return("404")
|
60
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "404 received from Yahoo API")
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should return an array of terms given a context" do
|
64
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
65
|
+
@response.should_receive(:code).once.and_return("200")
|
66
|
+
@response.should_receive(:body).and_return(@@good_xml)
|
67
|
+
@term_extractor.extract_terms(@@context).should == ["leeds city", "selby", "proposals", "principle", "partnership"]
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should raise an exception if an error xml resultset is returned" do
|
71
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
72
|
+
@response.should_receive(:code).once.and_return("200")
|
73
|
+
@response.should_receive(:body).and_return(@@error_xml)
|
74
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "The following errors were detected: limit exceeded")
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should raise an exception if an unknown xml document is returned" do
|
78
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
79
|
+
@response.should_receive(:code).once.and_return("200")
|
80
|
+
@response.should_receive(:body).twice.and_return(@@unknown_xml)
|
81
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "Unknown document returned: " + @@unknown_xml)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should accept query as an optional parameter" do
|
85
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context, 'query' => @@query})
|
86
|
+
@response.should_receive(:code).once.and_return("200")
|
87
|
+
@response.should_receive(:body).once.and_return(@@good_xml)
|
88
|
+
lambda {@term_extractor.extract_terms(@@context, :query => @@query)}.should_not raise_error
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yahoo_term_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Deb Bassett
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-28 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: This library can be used to call the Yahoo Term Extraction Web Service from Ruby. The Term Extraction Web Service provides a list of significant words or phrases extracted from a larger content.
|
17
|
+
email: deb@urbanwide.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/yahoo_term_extractor.rb
|
26
|
+
- spec/yahoo_term_extractor_spec.rb
|
27
|
+
- LICENSE
|
28
|
+
has_rdoc: false
|
29
|
+
homepage: http://www.urbanwide.com/
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: "0"
|
40
|
+
version:
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
requirements: []
|
48
|
+
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.0.1
|
51
|
+
signing_key:
|
52
|
+
specification_version: 2
|
53
|
+
summary: A library for accessing the Yahoo Term Extraction Web Service
|
54
|
+
test_files: []
|
55
|
+
|