alexrabarts-term_extraction 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION.yml ADDED
@@ -0,0 +1,4 @@
1
+ ---
2
+ :minor: 1
3
+ :patch: 0
4
+ :major: 0
@@ -0,0 +1,12 @@
1
+ class TermExtraction
2
+ def initialize(options={})
3
+ @context = options[:context]
4
+ @api_key = options[:api_key]
5
+ end
6
+
7
+ def canonical_name
8
+ self.class.canonical_name
9
+ end
10
+ end
11
+
12
+ %w{yahoo zemanta}.each{|t| require "term_extraction/#{t}"}
@@ -0,0 +1,51 @@
1
+ require 'nokogiri'
2
+
3
+ class TermExtraction
4
+ class Yahoo < TermExtraction
5
+ def terms
6
+ terms = []
7
+
8
+ unless data.nil?
9
+ data.search('//s:Result', ns).each do |n|
10
+ terms << n.text
11
+ end
12
+ end
13
+
14
+ terms
15
+ end
16
+
17
+ class << self
18
+ def canonical_name
19
+ 'yahoo'
20
+ end
21
+ end
22
+
23
+ private
24
+ def ns
25
+ {'s' => 'urn:yahoo:cate'}
26
+ end
27
+
28
+ def gateway
29
+ 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
30
+ end
31
+
32
+ def url
33
+ uri = Addressable::URI.parse(gateway)
34
+ uri.query_values = {
35
+ # TODO: Change appid to the BMP one
36
+ 'appid' => @api_key,
37
+ 'output' => 'xml',
38
+ 'context' => @context
39
+ }
40
+ uri
41
+ end
42
+
43
+ def remote_xml
44
+ open(url).read
45
+ end
46
+
47
+ def data
48
+ @data ||= Nokogiri::XML.parse(remote_xml)
49
+ end
50
+ end
51
+ end
@@ -0,0 +1,49 @@
1
+ require 'net/http'
2
+ require 'nokogiri'
3
+
4
+ class TermExtraction
5
+ class Zemanta < TermExtraction
6
+ def terms
7
+ terms = []
8
+
9
+ data.search('//name').each do |n|
10
+ terms << n.text
11
+ end
12
+
13
+ terms
14
+ end
15
+
16
+ class << self
17
+ def canonical_name
18
+ 'zemanta'
19
+ end
20
+ end
21
+
22
+ private
23
+ def gateway
24
+ 'http://api.zemanta.com/services/rest/0.0/'
25
+ end
26
+
27
+ def url
28
+ URI.parse(gateway)
29
+ end
30
+
31
+ def post_params
32
+ {
33
+ 'method' =>'zemanta.suggest',
34
+ 'api_key' => @api_key,
35
+ 'return_images' => 0,
36
+ 'text' => @context,
37
+ 'format' => 'xml'
38
+ }
39
+ end
40
+
41
+ def remote_xml
42
+ Net::HTTP.post_form(url, post_params).body
43
+ end
44
+
45
+ def data
46
+ @data ||= Nokogiri::XML.parse(remote_xml)
47
+ end
48
+ end
49
+ end
@@ -0,0 +1,3 @@
1
+ <?xml version="1.0"?>
2
+ <ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractionResponse.xsd"><Result>gears of war</Result><Result>gears</Result></ResultSet>
3
+ <!-- ws04.search.scd.yahoo.com uncompressed/chunked Thu Feb 26 21:04:16 PST 2009 -->
@@ -0,0 +1,104 @@
1
+ <?xml version="1.0"?>
2
+ <rsp>
3
+ <status>ok</status>
4
+ <articles>
5
+ <article>
6
+ <url>http://www.crunchgear.com/2009/02/24/nvidia-based-imacs-coming-soon/</url>
7
+ <confidence>0.033153</confidence>
8
+ <published_datetime>2009-02-24T18:00:45Z</published_datetime>
9
+ <zemified>0</zemified>
10
+ <title>NVIDIA-based iMacs coming soon?</title>
11
+ </article><article>
12
+ <url>http://www.tuaw.com/2009/02/24/rumor-new-imacs-around-the-bend/</url>
13
+ <confidence>0.028595</confidence>
14
+ <published_datetime>2009-02-24T22:00:00Z</published_datetime>
15
+ <zemified>0</zemified>
16
+ <title>Rumor: New iMacs around the bend</title>
17
+ </article><article>
18
+ <url>http://www.engadget.com/2009/02/24/a-few-new-rumors-point-to-two-new-nvidia-packing-imacs/</url>
19
+ <confidence>0.027595</confidence>
20
+ <published_datetime>2009-02-24T13:03:00Z</published_datetime>
21
+ <zemified>0</zemified>
22
+ <title>A few new rumors point to two new NVIDIA-packing iMacs</title>
23
+ </article><article>
24
+ <url>http://www.techmeme.com/090126/p31</url>
25
+ <confidence>0.019056</confidence>
26
+ <published_datetime>2009-01-26T17:00:22Z</published_datetime>
27
+ <zemified>0</zemified>
28
+ <title>Chip complex delaying Apple's new iMac line, says analyst (Zach Spear/AppleInsider)</title>
29
+ </article><article>
30
+ <url>http://i.gizmodo.com/5143476/apple-warns-resellers-of-reduced-imac-availability-new-models-on-the-way</url>
31
+ <confidence>0.018611</confidence>
32
+ <published_datetime>2009-01-31T16:30:00Z</published_datetime>
33
+ <zemified>0</zemified>
34
+ <title>Apple Warns Resellers of Reduced iMac Availability: New Models On the Way? [Apple]</title>
35
+ </article><article>
36
+ <url>http://cultofmac.com/analyst-new-imacs-delayed-for-chips-snow-leopard/7549</url>
37
+ <confidence>0.01685</confidence>
38
+ <published_datetime>2009-01-26T16:43:44Z</published_datetime>
39
+ <zemified>0</zemified>
40
+ <title>Analyst: New iMacs Delayed For Chips, Snow Leopard</title>
41
+ </article><article>
42
+ <url>http://www.ubergizmo.com/15/archives/2008/12/new_imac_allinone_pc_confirmed.html</url>
43
+ <confidence>0.016311</confidence>
44
+ <published_datetime>2008-12-24T01:06:56Z</published_datetime>
45
+ <zemified>0</zemified>
46
+ <title>New iMac All-In-One PC Confirmed</title>
47
+ </article><article>
48
+ <url>http://www.labnol.org/gadgets/exchange-windows-pc-for-apple-mac/5775/</url>
49
+ <confidence>0.016238</confidence>
50
+ <published_datetime>2008-12-03T08:17:35Z</published_datetime>
51
+ <zemified>0</zemified>
52
+ <title>Exchange Your Windows PC or TV for an Apple iMac</title>
53
+ </article><article>
54
+ <url>http://cultofmac.com/class-action-lawsuit-over-imac-display-problems/6562</url>
55
+ <confidence>0.015608</confidence>
56
+ <published_datetime>2009-01-02T16:17:06Z</published_datetime>
57
+ <zemified>0</zemified>
58
+ <title>Class-Action Lawsuit Over iMac Display Problems</title>
59
+ </article><article>
60
+ <url>http://www.crunchgear.com/2008/12/29/new-imacs-to-include-hotter-components-new-cooling/</url>
61
+ <confidence>0.015093</confidence>
62
+ <published_datetime>2008-12-29T23:50:04Z</published_datetime>
63
+ <zemified>0</zemified>
64
+ <title>New iMacs to include hotter components, new cooling?</title>
65
+ </article>
66
+ </articles><markup>
67
+ <text>apple imac</text>
68
+ </markup><signature>&lt;div class="zemanta-pixie"&gt;&lt;a class="zemanta-pixie-a" href="http://reblog.zemanta.com/zemified/3695b55a-d68e-4328-bf86-c3f01be8b2da/" title="Zemified by Zemanta"&gt;&lt;img class="zemanta-pixie-img" src="http://img.zemanta.com/reblog_e.png?x-id=3695b55a-d68e-4328-bf86-c3f01be8b2da" alt="Reblog this post [with Zemanta]" /&gt;&lt;/a&gt;&lt;/div&gt;</signature>
69
+ <keywords>
70
+ <keyword>
71
+ <confidence>0.404748</confidence>
72
+ <scheme>general</scheme>
73
+ <name>Apple</name>
74
+ </keyword><keyword>
75
+ <confidence>0.277753</confidence>
76
+ <scheme>general</scheme>
77
+ <name>IMac</name>
78
+ </keyword><keyword>
79
+ <confidence>0.094392</confidence>
80
+ <scheme>general</scheme>
81
+ <name>Rumor</name>
82
+ </keyword><keyword>
83
+ <confidence>0.076399</confidence>
84
+ <scheme>general</scheme>
85
+ <name>Hardware</name>
86
+ </keyword><keyword>
87
+ <confidence>0.072186</confidence>
88
+ <scheme>general</scheme>
89
+ <name>Nvidia</name>
90
+ </keyword><keyword>
91
+ <confidence>0.069183</confidence>
92
+ <scheme>general</scheme>
93
+ <name>Macintosh</name>
94
+ </keyword><keyword>
95
+ <confidence>0.050487</confidence>
96
+ <scheme>general</scheme>
97
+ <name>Desktops</name>
98
+ </keyword><keyword>
99
+ <confidence>0.039205</confidence>
100
+ <scheme>general</scheme>
101
+ <name>AllInOne</name>
102
+ </keyword>
103
+ </keywords><rid>3695b55a-d68e-4328-bf86-c3f01be8b2da</rid>
104
+ </rsp>
@@ -0,0 +1,15 @@
1
+ require File.dirname(__FILE__) + '/test_helper'
2
+
3
+ class TermExtractionTest < Test::Unit::TestCase
4
+ should 'return correct terms from Yahoo!' do
5
+ yahoo = TermExtraction::Yahoo.new
6
+ yahoo.stubs(:remote_xml).returns(read_xml_fixture('yahoo'))
7
+ assert_equal yahoo.terms, ['gears of war', 'gears']
8
+ end
9
+
10
+ should 'return correct terms from Zemanta' do
11
+ zemanta = TermExtraction::Zemanta.new
12
+ zemanta.stubs(:remote_xml).returns(read_xml_fixture('zemanta'))
13
+ assert_equal zemanta.terms, ['Apple', 'IMac', 'Rumor', 'Hardware', 'Nvidia', 'Macintosh', 'Desktops', 'AllInOne']
14
+ end
15
+ end
@@ -0,0 +1,13 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+ require 'mocha'
5
+
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'term_extraction'
8
+
9
+ class Test::Unit::TestCase
10
+ def read_xml_fixture(name)
11
+ File.read("test/fixtures/#{name}.xml")
12
+ end
13
+ end
metadata ADDED
@@ -0,0 +1,72 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: alexrabarts-term_extraction
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - alex
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+
12
+ date: 2009-02-27 00:00:00 -08:00
13
+ default_executable:
14
+ dependencies:
15
+ - !ruby/object:Gem::Dependency
16
+ name: nokogiri
17
+ type: :runtime
18
+ version_requirement:
19
+ version_requirements: !ruby/object:Gem::Requirement
20
+ requirements:
21
+ - - ">="
22
+ - !ruby/object:Gem::Version
23
+ version: 1.0.7
24
+ version:
25
+ description: Term extraction library
26
+ email: alexrabarts@gmail.com
27
+ executables: []
28
+
29
+ extensions: []
30
+
31
+ extra_rdoc_files: []
32
+
33
+ files:
34
+ - VERSION.yml
35
+ - lib/term_extraction
36
+ - lib/term_extraction/yahoo.rb
37
+ - lib/term_extraction/zemanta.rb
38
+ - lib/term_extraction.rb
39
+ - test/fixtures
40
+ - test/fixtures/yahoo.xml
41
+ - test/fixtures/zemanta.xml
42
+ - test/term_extraction_test.rb
43
+ - test/test_helper.rb
44
+ has_rdoc: true
45
+ homepage: http://github.com/alexrabarts/term_extraction
46
+ post_install_message:
47
+ rdoc_options:
48
+ - --inline-source
49
+ - --charset=UTF-8
50
+ require_paths:
51
+ - lib
52
+ required_ruby_version: !ruby/object:Gem::Requirement
53
+ requirements:
54
+ - - ">="
55
+ - !ruby/object:Gem::Version
56
+ version: "0"
57
+ version:
58
+ required_rubygems_version: !ruby/object:Gem::Requirement
59
+ requirements:
60
+ - - ">="
61
+ - !ruby/object:Gem::Version
62
+ version: "0"
63
+ version:
64
+ requirements: []
65
+
66
+ rubyforge_project:
67
+ rubygems_version: 1.2.0
68
+ signing_key:
69
+ specification_version: 2
70
+ summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta.
71
+ test_files: []
72
+