alexrabarts-term_extraction 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 1
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,12 @@
1
+ class TermExtraction
2
+ def initialize(options={})
3
+ @context = options[:context]
4
+ @api_key = options[:api_key]
5
+ end
6
+
7
+ def canonical_name
8
+ self.class.canonical_name
9
+ end
10
+ end
11
+
12
+ %w{yahoo zemanta}.each{|t| require "term_extraction/#{t}"}
@@ -0,0 +1,51 @@
1
+ require 'nokogiri'
2
+
3
+ class TermExtraction
4
+ class Yahoo < TermExtraction
5
+ def terms
6
+ terms = []
7
+
8
+ unless data.nil?
9
+ data.search('//s:Result', ns).each do |n|
10
+ terms << n.text
11
+ end
12
+ end
13
+
14
+ terms
15
+ end
16
+
17
+ class << self
18
+ def canonical_name
19
+ 'yahoo'
20
+ end
21
+ end
22
+
23
+ private
24
+ def ns
25
+ {'s' => 'urn:yahoo:cate'}
26
+ end
27
+
28
+ def gateway
29
+ 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
30
+ end
31
+
32
+ def url
33
+ uri = Addressable::URI.parse(gateway)
34
+ uri.query_values = {
35
+ # TODO: Change appid to the BMP one
36
+ 'appid' => @api_key,
37
+ 'output' => 'xml',
38
+ 'context' => @context
39
+ }
40
+ uri
41
+ end
42
+
43
+ def remote_xml
44
+ open(url).read
45
+ end
46
+
47
+ def data
48
+ @data ||= Nokogiri::XML.parse(remote_xml)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,49 @@
1
+ require 'net/http'
2
+ require 'nokogiri'
3
+
4
+ class TermExtraction
5
+ class Zemanta < TermExtraction
6
+ def terms
7
+ terms = []
8
+
9
+ data.search('//name').each do |n|
10
+ terms << n.text
11
+ end
12
+
13
+ terms
14
+ end
15
+
16
+ class << self
17
+ def canonical_name
18
+ 'zemanta'
19
+ end
20
+ end
21
+
22
+ private
23
+ def gateway
24
+ 'http://api.zemanta.com/services/rest/0.0/'
25
+ end
26
+
27
+ def url
28
+ URI.parse(gateway)
29
+ end
30
+
31
+ def post_params
32
+ {
33
+ 'method' =>'zemanta.suggest',
34
+ 'api_key' => @api_key,
35
+ 'return_images' => 0,
36
+ 'text' => @context,
37
+ 'format' => 'xml'
38
+ }
39
+ end
40
+
41
+ def remote_xml
42
+ Net::HTTP.post_form(url, post_params).body
43
+ end
44
+
45
+ def data
46
+ @data ||= Nokogiri::XML.parse(remote_xml)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ <?xml version="1.0"?>
2
+ <ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractionResponse.xsd"><Result>gears of war</Result><Result>gears</Result></ResultSet>
3
+ <!-- ws04.search.scd.yahoo.com uncompressed/chunked Thu Feb 26 21:04:16 PST 2009 -->
@@ -0,0 +1,104 @@
1
+ <?xml version="1.0"?>
2
+ <rsp>
3
+ <status>ok</status>
4
+ <articles>
5
+ <article>
6
+ <url>http://www.crunchgear.com/2009/02/24/nvidia-based-imacs-coming-soon/</url>
7
+ <confidence>0.033153</confidence>
8
+ <published_datetime>2009-02-24T18:00:45Z</published_datetime>
9
+ <zemified>0</zemified>
10
+ <title>NVIDIA-based iMacs coming soon?</title>
11
+ </article><article>
12
+ <url>http://www.tuaw.com/2009/02/24/rumor-new-imacs-around-the-bend/</url>
13
+ <confidence>0.028595</confidence>
14
+ <published_datetime>2009-02-24T22:00:00Z</published_datetime>
15
+ <zemified>0</zemified>
16
+ <title>Rumor: New iMacs around the bend</title>
17
+ </article><article>
18
+ <url>http://www.engadget.com/2009/02/24/a-few-new-rumors-point-to-two-new-nvidia-packing-imacs/</url>
19
+ <confidence>0.027595</confidence>
20
+ <published_datetime>2009-02-24T13:03:00Z</published_datetime>
21
+ <zemified>0</zemified>
22
+ <title>A few new rumors point to two new NVIDIA-packing iMacs</title>
23
+ </article><article>
24
+ <url>http://www.techmeme.com/090126/p31</url>
25
+ <confidence>0.019056</confidence>
26
+ <published_datetime>2009-01-26T17:00:22Z</published_datetime>
27
+ <zemified>0</zemified>
28
+ <title>Chip complex delaying Apple's new iMac line, says analyst (Zach Spear/AppleInsider)</title>
29
+ </article><article>
30
+ <url>http://i.gizmodo.com/5143476/apple-warns-resellers-of-reduced-imac-availability-new-models-on-the-way</url>
31
+ <confidence>0.018611</confidence>
32
+ <published_datetime>2009-01-31T16:30:00Z</published_datetime>
33
+ <zemified>0</zemified>
34
+ <title>Apple Warns Resellers of Reduced iMac Availability: New Models On the Way? [Apple]</title>
35
+ </article><article>
36
+ <url>http://cultofmac.com/analyst-new-imacs-delayed-for-chips-snow-leopard/7549</url>
37
+ <confidence>0.01685</confidence>
38
+ <published_datetime>2009-01-26T16:43:44Z</published_datetime>
39
+ <zemified>0</zemified>
40
+ <title>Analyst: New iMacs Delayed For Chips, Snow Leopard</title>
41
+ </article><article>
42
+ <url>http://www.ubergizmo.com/15/archives/2008/12/new_imac_allinone_pc_confirmed.html</url>
43
+ <confidence>0.016311</confidence>
44
+ <published_datetime>2008-12-24T01:06:56Z</published_datetime>
45
+ <zemified>0</zemified>
46
+ <title>New iMac All-In-One PC Confirmed</title>
47
+ </article><article>
48
+ <url>http://www.labnol.org/gadgets/exchange-windows-pc-for-apple-mac/5775/</url>
49
+ <confidence>0.016238</confidence>
50
+ <published_datetime>2008-12-03T08:17:35Z</published_datetime>
51
+ <zemified>0</zemified>
52
+ <title>Exchange Your Windows PC or TV for an Apple iMac</title>
53
+ </article><article>
54
+ <url>http://cultofmac.com/class-action-lawsuit-over-imac-display-problems/6562</url>
55
+ <confidence>0.015608</confidence>
56
+ <published_datetime>2009-01-02T16:17:06Z</published_datetime>
57
+ <zemified>0</zemified>
58
+ <title>Class-Action Lawsuit Over iMac Display Problems</title>
59
+ </article><article>
60
+ <url>http://www.crunchgear.com/2008/12/29/new-imacs-to-include-hotter-components-new-cooling/</url>
61
+ <confidence>0.015093</confidence>
62
+ <published_datetime>2008-12-29T23:50:04Z</published_datetime>
63
+ <zemified>0</zemified>
64
+ <title>New iMacs to include hotter components, new cooling?</title>
65
+ </article>
66
+ </articles><markup>
67
+ <text>apple imac</text>
68
+ </markup><signature>&lt;div class="zemanta-pixie"&gt;&lt;a class="zemanta-pixie-a" href="http://reblog.zemanta.com/zemified/3695b55a-d68e-4328-bf86-c3f01be8b2da/" title="Zemified by Zemanta"&gt;&lt;img class="zemanta-pixie-img" src="http://img.zemanta.com/reblog_e.png?x-id=3695b55a-d68e-4328-bf86-c3f01be8b2da" alt="Reblog this post [with Zemanta]" /&gt;&lt;/a&gt;&lt;/div&gt;</signature>
69
+ <keywords>
70
+ <keyword>
71
+ <confidence>0.404748</confidence>
72
+ <scheme>general</scheme>
73
+ <name>Apple</name>
74
+ </keyword><keyword>
75
+ <confidence>0.277753</confidence>
76
+ <scheme>general</scheme>
77
+ <name>IMac</name>
78
+ </keyword><keyword>
79
+ <confidence>0.094392</confidence>
80
+ <scheme>general</scheme>
81
+ <name>Rumor</name>
82
+ </keyword><keyword>
83
+ <confidence>0.076399</confidence>
84
+ <scheme>general</scheme>
85
+ <name>Hardware</name>
86
+ </keyword><keyword>
87
+ <confidence>0.072186</confidence>
88
+ <scheme>general</scheme>
89
+ <name>Nvidia</name>
90
+ </keyword><keyword>
91
+ <confidence>0.069183</confidence>
92
+ <scheme>general</scheme>
93
+ <name>Macintosh</name>
94
+ </keyword><keyword>
95
+ <confidence>0.050487</confidence>
96
+ <scheme>general</scheme>
97
+ <name>Desktops</name>
98
+ </keyword><keyword>
99
+ <confidence>0.039205</confidence>
100
+ <scheme>general</scheme>
101
+ <name>AllInOne</name>
102
+ </keyword>
103
+ </keywords><rid>3695b55a-d68e-4328-bf86-c3f01be8b2da</rid>
104
+ </rsp>
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TermExtractionTest < Test::Unit::TestCase
4
+ should 'return correct terms from Yahoo!' do
5
+ yahoo = TermExtraction::Yahoo.new
6
+ yahoo.stubs(:remote_xml).returns(read_xml_fixture('yahoo'))
7
+ assert_equal yahoo.terms, ['gears of war', 'gears']
8
+ end
9
+
10
+ should 'return correct terms from Zemanta' do
11
+ zemanta = TermExtraction::Zemanta.new
12
+ zemanta.stubs(:remote_xml).returns(read_xml_fixture('zemanta'))
13
+ assert_equal zemanta.terms, ['Apple', 'IMac', 'Rumor', 'Hardware', 'Nvidia', 'Macintosh', 'Desktops', 'AllInOne']
14
+ end
15
+ end
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'term_extraction'
8
+
9
+ class Test::Unit::TestCase
10
+ def read_xml_fixture(name)
11
+ File.read("test/fixtures/#{name}.xml")
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alexrabarts-term_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - alex
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-27 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.0.7
24
+ version:
25
+ description: Term extraction library
26
+ email: alexrabarts@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - VERSION.yml
35
+ - lib/term_extraction
36
+ - lib/term_extraction/yahoo.rb
37
+ - lib/term_extraction/zemanta.rb
38
+ - lib/term_extraction.rb
39
+ - test/fixtures
40
+ - test/fixtures/yahoo.xml
41
+ - test/fixtures/zemanta.xml
42
+ - test/term_extraction_test.rb
43
+ - test/test_helper.rb
44
+ has_rdoc: true
45
+ homepage: http://github.com/alexrabarts/term_extraction
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --inline-source
49
+ - --charset=UTF-8
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.2.0
68
+ signing_key:
69
+ specification_version: 2
70
+ summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta.
71
+ test_files: []
72
+