yahoo_term_extraction 0.2.3
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE +26 -0
- data/lib/yahoo_term_extractor.rb +52 -0
- data/spec/yahoo_term_extractor_spec.rb +92 -0
- metadata +55 -0
data/LICENSE
ADDED
@@ -0,0 +1,26 @@
|
|
1
|
+
Copyright (c) 2008 Deb Bassett
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person
|
4
|
+
obtaining a copy of this software and associated documentation
|
5
|
+
files (the "Software"), to deal in the Software without
|
6
|
+
restriction, including without limitation the rights to use,
|
7
|
+
copy, modify, merge, publish, distribute, sublicense, and/or sell
|
8
|
+
copies of the Software, and to permit persons to whom the
|
9
|
+
Software is furnished to do so, subject to the following
|
10
|
+
conditions:
|
11
|
+
|
12
|
+
The above copyright notice and this permission notice shall be
|
13
|
+
included in all copies or substantial portions of the Software.
|
14
|
+
|
15
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
16
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES
|
17
|
+
OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
18
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT
|
19
|
+
HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY,
|
20
|
+
WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
|
21
|
+
FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR
|
22
|
+
OTHER DEALINGS IN THE SOFTWARE.
|
23
|
+
|
24
|
+
The Yahoo Term Extraction module is copywrited free software by Deb Bassett
|
25
|
+
<deb@urbanwide.com>. You can redistribute it under the terms specified in
|
26
|
+
the COPYING file of the Ruby distribution.
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'rexml/document'
|
2
|
+
require 'net/http'
|
3
|
+
|
4
|
+
module Yahoo
|
5
|
+
|
6
|
+
class APIError < Exception; end
|
7
|
+
|
8
|
+
# Yahoo Term Extractor ruby class.
|
9
|
+
# Be warned that Yahoo rate limit this api to 5000 queries a day!
|
10
|
+
# You need an appid from yahoo to use this.
|
11
|
+
class TermExtractor
|
12
|
+
|
13
|
+
API_URL = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
|
14
|
+
|
15
|
+
# term_extractor = YahooTermExtractor.new("appid")
|
16
|
+
def initialize(appid)
|
17
|
+
raise ArgumentError, 'appid must be supplied' if appid.nil? or appid.empty?
|
18
|
+
@appid = appid
|
19
|
+
end
|
20
|
+
|
21
|
+
# term_extractor.extract_terms(" Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region.", :query => "leeds")
|
22
|
+
def extract_terms(context, args = {})
|
23
|
+
raise ArgumentError, 'context must be supplied' if context.empty?
|
24
|
+
params = { 'appid' => @appid, 'context' => context}
|
25
|
+
params['query'] = args[:query] if args.has_key?(:query) and !args[:query].empty?
|
26
|
+
|
27
|
+
# DEB: put a begin -> rescue block round this!? or just let them bubble to the top...
|
28
|
+
response = Net::HTTP.post_form(API_URL, params)
|
29
|
+
unless response.code == '200'
|
30
|
+
raise Yahoo::APIError, "#{response.code} received from Yahoo API"
|
31
|
+
end
|
32
|
+
|
33
|
+
xml = REXML::Document.new response.body
|
34
|
+
if !xml.root.nil? and xml.root.name == "ResultSet"
|
35
|
+
terms = []
|
36
|
+
xml.each_element("/ResultSet/Result") do |term|
|
37
|
+
terms << term.text
|
38
|
+
end
|
39
|
+
elsif !xml.root.nil? and xml.root.name == "Error"
|
40
|
+
messages = []
|
41
|
+
xml.each_element("/Error/Message") do |message|
|
42
|
+
messages << message.text
|
43
|
+
end
|
44
|
+
raise Yahoo::APIError, "#{xml.root.text} #{messages.join(', ')}"
|
45
|
+
else
|
46
|
+
raise Yahoo::APIError, "Unknown document returned: #{response.body}"
|
47
|
+
end
|
48
|
+
|
49
|
+
return terms
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,92 @@
|
|
1
|
+
# require File.dirname(__FILE__) + '/spec_helper'
|
2
|
+
require File.dirname(__FILE__) + '/../lib/yahoo_term_extractor'
|
3
|
+
|
4
|
+
describe Yahoo::TermExtractor do
|
5
|
+
describe "new" do
|
6
|
+
|
7
|
+
it "should create a Yahoo::TermExtractor object" do
|
8
|
+
Yahoo::TermExtractor.new("appid").should be_an_instance_of(Yahoo::TermExtractor)
|
9
|
+
end
|
10
|
+
|
11
|
+
it "should not raise an exception if appid is supplied" do
|
12
|
+
lambda {Yahoo::TermExtractor.new("appid")}.should_not raise_error(ArgumentError, "appid must be supplied")
|
13
|
+
end
|
14
|
+
|
15
|
+
it "should raise an exception if an appid is nil" do
|
16
|
+
lambda {Yahoo::TermExtractor.new(nil)}.should raise_error(ArgumentError, "appid must be supplied")
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should raise an exception if an appid is empty" do
|
20
|
+
lambda {Yahoo::TermExtractor.new("")}.should raise_error(ArgumentError, "appid must be supplied")
|
21
|
+
end
|
22
|
+
|
23
|
+
end
|
24
|
+
|
25
|
+
describe "extract_terms" do
|
26
|
+
@@context = "Several eco-town proposals were submitted for locations within this area, principally between Leeds and Selby. The Leeds City Region Partnership has indicated support in principle for an eco-town within the sub-region."
|
27
|
+
@@good_xml = %{
|
28
|
+
<ResultSet xmlns:xsi='http://www.w3.org/2001/XMLSchema-instance' xsi:schemaLocation='urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractorResponse.xsd' xmlns='urn:yahoo:cate'>
|
29
|
+
<Result>leeds city</Result>
|
30
|
+
<Result>selby</Result>
|
31
|
+
<Result>proposals</Result>
|
32
|
+
<Result>principle</Result>
|
33
|
+
<Result>partnership</Result>
|
34
|
+
</ResultSet>
|
35
|
+
}
|
36
|
+
@@error_xml = %{
|
37
|
+
<Error xmlns="urn:yahoo:api" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:noNamespaceSchemaLocation="http://api.yahoo.com/Api/V1/error.xsd">The following errors were detected: <Message>limit exceeded</Message></Error>
|
38
|
+
}
|
39
|
+
@@unknown_xml = %{<thing>why do I bother?</thing>}
|
40
|
+
@@query = 'leeds'
|
41
|
+
|
42
|
+
before(:each) do
|
43
|
+
@appid = "test-app-id"
|
44
|
+
@api_uri = URI.parse('http://api.search.yahoo.com/ContentAnalysisService/V1/termExtraction')
|
45
|
+
@mock_response = mock("response")
|
46
|
+
@term_extractor = Yahoo::TermExtractor.new(@appid)
|
47
|
+
end
|
48
|
+
|
49
|
+
it "should raise an exception if context is nil" do
|
50
|
+
lambda {@term_extractor.extract_terms()}.should raise_error(ArgumentError, "wrong number of arguments (0 for 1)")
|
51
|
+
end
|
52
|
+
|
53
|
+
it "should raise an exception if context is empty" do
|
54
|
+
lambda {@term_extractor.extract_terms("")}.should raise_error(ArgumentError, "context must be supplied")
|
55
|
+
end
|
56
|
+
|
57
|
+
it "should raise an exception if a non-200 response code is returned" do
|
58
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
59
|
+
@response.should_receive(:code).twice.and_return("404")
|
60
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "404 received from Yahoo API")
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should return an array of terms given a context" do
|
64
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
65
|
+
@response.should_receive(:code).once.and_return("200")
|
66
|
+
@response.should_receive(:body).and_return(@@good_xml)
|
67
|
+
@term_extractor.extract_terms(@@context).should == ["leeds city", "selby", "proposals", "principle", "partnership"]
|
68
|
+
end
|
69
|
+
|
70
|
+
it "should raise an exception if an error xml resultset is returned" do
|
71
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
72
|
+
@response.should_receive(:code).once.and_return("200")
|
73
|
+
@response.should_receive(:body).and_return(@@error_xml)
|
74
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "The following errors were detected: limit exceeded")
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should raise an exception if an unknown xml document is returned" do
|
78
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context})
|
79
|
+
@response.should_receive(:code).once.and_return("200")
|
80
|
+
@response.should_receive(:body).twice.and_return(@@unknown_xml)
|
81
|
+
lambda {@term_extractor.extract_terms(@@context)}.should raise_error(Yahoo::APIError, "Unknown document returned: " + @@unknown_xml)
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should accept query as an optional parameter" do
|
85
|
+
Net::HTTP.should_receive(:post_form).with(@api_uri, {'appid' => @appid, 'context' => @@context, 'query' => @@query})
|
86
|
+
@response.should_receive(:code).once.and_return("200")
|
87
|
+
@response.should_receive(:body).once.and_return(@@good_xml)
|
88
|
+
lambda {@term_extractor.extract_terms(@@context, :query => @@query)}.should_not raise_error
|
89
|
+
end
|
90
|
+
end
|
91
|
+
|
92
|
+
end
|
metadata
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: yahoo_term_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.2.3
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- Deb Bassett
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2008-07-28 00:00:00 +01:00
|
13
|
+
default_executable:
|
14
|
+
dependencies: []
|
15
|
+
|
16
|
+
description: This library can be used to call the Yahoo Term Extraction Web Service from Ruby. The Term Extraction Web Service provides a list of significant words or phrases extracted from a larger content.
|
17
|
+
email: deb@urbanwide.com
|
18
|
+
executables: []
|
19
|
+
|
20
|
+
extensions: []
|
21
|
+
|
22
|
+
extra_rdoc_files: []
|
23
|
+
|
24
|
+
files:
|
25
|
+
- lib/yahoo_term_extractor.rb
|
26
|
+
- spec/yahoo_term_extractor_spec.rb
|
27
|
+
- LICENSE
|
28
|
+
has_rdoc: false
|
29
|
+
homepage: http://www.urbanwide.com/
|
30
|
+
post_install_message:
|
31
|
+
rdoc_options: []
|
32
|
+
|
33
|
+
require_paths:
|
34
|
+
- lib
|
35
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
36
|
+
requirements:
|
37
|
+
- - ">="
|
38
|
+
- !ruby/object:Gem::Version
|
39
|
+
version: "0"
|
40
|
+
version:
|
41
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: "0"
|
46
|
+
version:
|
47
|
+
requirements: []
|
48
|
+
|
49
|
+
rubyforge_project:
|
50
|
+
rubygems_version: 1.0.1
|
51
|
+
signing_key:
|
52
|
+
specification_version: 2
|
53
|
+
summary: A library for accessing the Yahoo Term Extraction Web Service
|
54
|
+
test_files: []
|
55
|
+
|