alexrabarts-term_extraction 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION.yml +4 -0
- data/lib/term_extraction.rb +12 -0
- data/lib/term_extraction/yahoo.rb +51 -0
- data/lib/term_extraction/zemanta.rb +49 -0
- data/test/fixtures/yahoo.xml +3 -0
- data/test/fixtures/zemanta.xml +104 -0
- data/test/term_extraction_test.rb +15 -0
- data/test/test_helper.rb +13 -0
- metadata +72 -0
data/VERSION.yml
ADDED
@@ -0,0 +1,51 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
|
3
|
+
class TermExtraction
|
4
|
+
class Yahoo < TermExtraction
|
5
|
+
def terms
|
6
|
+
terms = []
|
7
|
+
|
8
|
+
unless data.nil?
|
9
|
+
data.search('//s:Result', ns).each do |n|
|
10
|
+
terms << n.text
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
terms
|
15
|
+
end
|
16
|
+
|
17
|
+
class << self
|
18
|
+
def canonical_name
|
19
|
+
'yahoo'
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
private
|
24
|
+
def ns
|
25
|
+
{'s' => 'urn:yahoo:cate'}
|
26
|
+
end
|
27
|
+
|
28
|
+
def gateway
|
29
|
+
'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
|
30
|
+
end
|
31
|
+
|
32
|
+
def url
|
33
|
+
uri = Addressable::URI.parse(gateway)
|
34
|
+
uri.query_values = {
|
35
|
+
# TODO: Change appid to the BMP one
|
36
|
+
'appid' => @api_key,
|
37
|
+
'output' => 'xml',
|
38
|
+
'context' => @context
|
39
|
+
}
|
40
|
+
uri
|
41
|
+
end
|
42
|
+
|
43
|
+
def remote_xml
|
44
|
+
open(url).read
|
45
|
+
end
|
46
|
+
|
47
|
+
def data
|
48
|
+
@data ||= Nokogiri::XML.parse(remote_xml)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
@@ -0,0 +1,49 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'nokogiri'
|
3
|
+
|
4
|
+
class TermExtraction
|
5
|
+
class Zemanta < TermExtraction
|
6
|
+
def terms
|
7
|
+
terms = []
|
8
|
+
|
9
|
+
data.search('//name').each do |n|
|
10
|
+
terms << n.text
|
11
|
+
end
|
12
|
+
|
13
|
+
terms
|
14
|
+
end
|
15
|
+
|
16
|
+
class << self
|
17
|
+
def canonical_name
|
18
|
+
'zemanta'
|
19
|
+
end
|
20
|
+
end
|
21
|
+
|
22
|
+
private
|
23
|
+
def gateway
|
24
|
+
'http://api.zemanta.com/services/rest/0.0/'
|
25
|
+
end
|
26
|
+
|
27
|
+
def url
|
28
|
+
URI.parse(gateway)
|
29
|
+
end
|
30
|
+
|
31
|
+
def post_params
|
32
|
+
{
|
33
|
+
'method' =>'zemanta.suggest',
|
34
|
+
'api_key' => @api_key,
|
35
|
+
'return_images' => 0,
|
36
|
+
'text' => @context,
|
37
|
+
'format' => 'xml'
|
38
|
+
}
|
39
|
+
end
|
40
|
+
|
41
|
+
def remote_xml
|
42
|
+
Net::HTTP.post_form(url, post_params).body
|
43
|
+
end
|
44
|
+
|
45
|
+
def data
|
46
|
+
@data ||= Nokogiri::XML.parse(remote_xml)
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
@@ -0,0 +1,3 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractionResponse.xsd"><Result>gears of war</Result><Result>gears</Result></ResultSet>
|
3
|
+
<!-- ws04.search.scd.yahoo.com uncompressed/chunked Thu Feb 26 21:04:16 PST 2009 -->
|
@@ -0,0 +1,104 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<rsp>
|
3
|
+
<status>ok</status>
|
4
|
+
<articles>
|
5
|
+
<article>
|
6
|
+
<url>http://www.crunchgear.com/2009/02/24/nvidia-based-imacs-coming-soon/</url>
|
7
|
+
<confidence>0.033153</confidence>
|
8
|
+
<published_datetime>2009-02-24T18:00:45Z</published_datetime>
|
9
|
+
<zemified>0</zemified>
|
10
|
+
<title>NVIDIA-based iMacs coming soon?</title>
|
11
|
+
</article><article>
|
12
|
+
<url>http://www.tuaw.com/2009/02/24/rumor-new-imacs-around-the-bend/</url>
|
13
|
+
<confidence>0.028595</confidence>
|
14
|
+
<published_datetime>2009-02-24T22:00:00Z</published_datetime>
|
15
|
+
<zemified>0</zemified>
|
16
|
+
<title>Rumor: New iMacs around the bend</title>
|
17
|
+
</article><article>
|
18
|
+
<url>http://www.engadget.com/2009/02/24/a-few-new-rumors-point-to-two-new-nvidia-packing-imacs/</url>
|
19
|
+
<confidence>0.027595</confidence>
|
20
|
+
<published_datetime>2009-02-24T13:03:00Z</published_datetime>
|
21
|
+
<zemified>0</zemified>
|
22
|
+
<title>A few new rumors point to two new NVIDIA-packing iMacs</title>
|
23
|
+
</article><article>
|
24
|
+
<url>http://www.techmeme.com/090126/p31</url>
|
25
|
+
<confidence>0.019056</confidence>
|
26
|
+
<published_datetime>2009-01-26T17:00:22Z</published_datetime>
|
27
|
+
<zemified>0</zemified>
|
28
|
+
<title>Chip complex delaying Apple's new iMac line, says analyst (Zach Spear/AppleInsider)</title>
|
29
|
+
</article><article>
|
30
|
+
<url>http://i.gizmodo.com/5143476/apple-warns-resellers-of-reduced-imac-availability-new-models-on-the-way</url>
|
31
|
+
<confidence>0.018611</confidence>
|
32
|
+
<published_datetime>2009-01-31T16:30:00Z</published_datetime>
|
33
|
+
<zemified>0</zemified>
|
34
|
+
<title>Apple Warns Resellers of Reduced iMac Availability: New Models On the Way? [Apple]</title>
|
35
|
+
</article><article>
|
36
|
+
<url>http://cultofmac.com/analyst-new-imacs-delayed-for-chips-snow-leopard/7549</url>
|
37
|
+
<confidence>0.01685</confidence>
|
38
|
+
<published_datetime>2009-01-26T16:43:44Z</published_datetime>
|
39
|
+
<zemified>0</zemified>
|
40
|
+
<title>Analyst: New iMacs Delayed For Chips, Snow Leopard</title>
|
41
|
+
</article><article>
|
42
|
+
<url>http://www.ubergizmo.com/15/archives/2008/12/new_imac_allinone_pc_confirmed.html</url>
|
43
|
+
<confidence>0.016311</confidence>
|
44
|
+
<published_datetime>2008-12-24T01:06:56Z</published_datetime>
|
45
|
+
<zemified>0</zemified>
|
46
|
+
<title>New iMac All-In-One PC Confirmed</title>
|
47
|
+
</article><article>
|
48
|
+
<url>http://www.labnol.org/gadgets/exchange-windows-pc-for-apple-mac/5775/</url>
|
49
|
+
<confidence>0.016238</confidence>
|
50
|
+
<published_datetime>2008-12-03T08:17:35Z</published_datetime>
|
51
|
+
<zemified>0</zemified>
|
52
|
+
<title>Exchange Your Windows PC or TV for an Apple iMac</title>
|
53
|
+
</article><article>
|
54
|
+
<url>http://cultofmac.com/class-action-lawsuit-over-imac-display-problems/6562</url>
|
55
|
+
<confidence>0.015608</confidence>
|
56
|
+
<published_datetime>2009-01-02T16:17:06Z</published_datetime>
|
57
|
+
<zemified>0</zemified>
|
58
|
+
<title>Class-Action Lawsuit Over iMac Display Problems</title>
|
59
|
+
</article><article>
|
60
|
+
<url>http://www.crunchgear.com/2008/12/29/new-imacs-to-include-hotter-components-new-cooling/</url>
|
61
|
+
<confidence>0.015093</confidence>
|
62
|
+
<published_datetime>2008-12-29T23:50:04Z</published_datetime>
|
63
|
+
<zemified>0</zemified>
|
64
|
+
<title>New iMacs to include hotter components, new cooling?</title>
|
65
|
+
</article>
|
66
|
+
</articles><markup>
|
67
|
+
<text>apple imac</text>
|
68
|
+
</markup><signature><div class="zemanta-pixie"><a class="zemanta-pixie-a" href="http://reblog.zemanta.com/zemified/3695b55a-d68e-4328-bf86-c3f01be8b2da/" title="Zemified by Zemanta"><img class="zemanta-pixie-img" src="http://img.zemanta.com/reblog_e.png?x-id=3695b55a-d68e-4328-bf86-c3f01be8b2da" alt="Reblog this post [with Zemanta]" /></a></div></signature>
|
69
|
+
<keywords>
|
70
|
+
<keyword>
|
71
|
+
<confidence>0.404748</confidence>
|
72
|
+
<scheme>general</scheme>
|
73
|
+
<name>Apple</name>
|
74
|
+
</keyword><keyword>
|
75
|
+
<confidence>0.277753</confidence>
|
76
|
+
<scheme>general</scheme>
|
77
|
+
<name>IMac</name>
|
78
|
+
</keyword><keyword>
|
79
|
+
<confidence>0.094392</confidence>
|
80
|
+
<scheme>general</scheme>
|
81
|
+
<name>Rumor</name>
|
82
|
+
</keyword><keyword>
|
83
|
+
<confidence>0.076399</confidence>
|
84
|
+
<scheme>general</scheme>
|
85
|
+
<name>Hardware</name>
|
86
|
+
</keyword><keyword>
|
87
|
+
<confidence>0.072186</confidence>
|
88
|
+
<scheme>general</scheme>
|
89
|
+
<name>Nvidia</name>
|
90
|
+
</keyword><keyword>
|
91
|
+
<confidence>0.069183</confidence>
|
92
|
+
<scheme>general</scheme>
|
93
|
+
<name>Macintosh</name>
|
94
|
+
</keyword><keyword>
|
95
|
+
<confidence>0.050487</confidence>
|
96
|
+
<scheme>general</scheme>
|
97
|
+
<name>Desktops</name>
|
98
|
+
</keyword><keyword>
|
99
|
+
<confidence>0.039205</confidence>
|
100
|
+
<scheme>general</scheme>
|
101
|
+
<name>AllInOne</name>
|
102
|
+
</keyword>
|
103
|
+
</keywords><rid>3695b55a-d68e-4328-bf86-c3f01be8b2da</rid>
|
104
|
+
</rsp>
|
@@ -0,0 +1,15 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class TermExtractionTest < Test::Unit::TestCase
|
4
|
+
should 'return correct terms from Yahoo!' do
|
5
|
+
yahoo = TermExtraction::Yahoo.new
|
6
|
+
yahoo.stubs(:remote_xml).returns(read_xml_fixture('yahoo'))
|
7
|
+
assert_equal yahoo.terms, ['gears of war', 'gears']
|
8
|
+
end
|
9
|
+
|
10
|
+
should 'return correct terms from Zemanta' do
|
11
|
+
zemanta = TermExtraction::Zemanta.new
|
12
|
+
zemanta.stubs(:remote_xml).returns(read_xml_fixture('zemanta'))
|
13
|
+
assert_equal zemanta.terms, ['Apple', 'IMac', 'Rumor', 'Hardware', 'Nvidia', 'Macintosh', 'Desktops', 'AllInOne']
|
14
|
+
end
|
15
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
require 'mocha'
|
5
|
+
|
6
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
7
|
+
require 'term_extraction'
|
8
|
+
|
9
|
+
class Test::Unit::TestCase
|
10
|
+
def read_xml_fixture(name)
|
11
|
+
File.read("test/fixtures/#{name}.xml")
|
12
|
+
end
|
13
|
+
end
|
metadata
ADDED
@@ -0,0 +1,72 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: alexrabarts-term_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.0
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- alex
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-02-27 00:00:00 -08:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.0.7
|
24
|
+
version:
|
25
|
+
description: Term extraction library
|
26
|
+
email: alexrabarts@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files: []
|
32
|
+
|
33
|
+
files:
|
34
|
+
- VERSION.yml
|
35
|
+
- lib/term_extraction
|
36
|
+
- lib/term_extraction/yahoo.rb
|
37
|
+
- lib/term_extraction/zemanta.rb
|
38
|
+
- lib/term_extraction.rb
|
39
|
+
- test/fixtures
|
40
|
+
- test/fixtures/yahoo.xml
|
41
|
+
- test/fixtures/zemanta.xml
|
42
|
+
- test/term_extraction_test.rb
|
43
|
+
- test/test_helper.rb
|
44
|
+
has_rdoc: true
|
45
|
+
homepage: http://github.com/alexrabarts/term_extraction
|
46
|
+
post_install_message:
|
47
|
+
rdoc_options:
|
48
|
+
- --inline-source
|
49
|
+
- --charset=UTF-8
|
50
|
+
require_paths:
|
51
|
+
- lib
|
52
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
53
|
+
requirements:
|
54
|
+
- - ">="
|
55
|
+
- !ruby/object:Gem::Version
|
56
|
+
version: "0"
|
57
|
+
version:
|
58
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
59
|
+
requirements:
|
60
|
+
- - ">="
|
61
|
+
- !ruby/object:Gem::Version
|
62
|
+
version: "0"
|
63
|
+
version:
|
64
|
+
requirements: []
|
65
|
+
|
66
|
+
rubyforge_project:
|
67
|
+
rubygems_version: 1.2.0
|
68
|
+
signing_key:
|
69
|
+
specification_version: 2
|
70
|
+
summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta.
|
71
|
+
test_files: []
|
72
|
+
|