term_extraction 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +4 -0
- data/LICENSE +22 -0
- data/README +31 -0
- data/Rakefile +55 -0
- data/VERSION.yml +4 -0
- data/lib/term_extraction/yahoo.rb +54 -0
- data/lib/term_extraction/zemanta.rb +52 -0
- data/lib/term_extraction.rb +14 -0
- data/term_extraction.gemspec +59 -0
- data/test/fixtures/yahoo.xml +3 -0
- data/test/fixtures/yahoo2.xml +3 -0
- data/test/fixtures/zemanta.xml +104 -0
- data/test/fixtures/zemanta2.xml +104 -0
- data/test/term_extraction_test.rb +62 -0
- data/test/test_helper.rb +22 -0
- metadata +80 -0
data/.gitignore
ADDED
data/LICENSE
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
(The MIT License)
|
2
|
+
|
3
|
+
Copyright (c) 2009 Stateless Systems (http://statelesssystems.com)
|
4
|
+
|
5
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
6
|
+
a copy of this software and associated documentation files (the
|
7
|
+
'Software'), to deal in the Software without restriction, including
|
8
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
9
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
10
|
+
permit persons to whom the Software is furnished to do so, subject to
|
11
|
+
the following conditions:
|
12
|
+
|
13
|
+
The above copyright notice and this permission notice shall be
|
14
|
+
included in all copies or substantial portions of the Software.
|
15
|
+
|
16
|
+
THE SOFTWARE IS PROVIDED 'AS IS', WITHOUT WARRANTY OF ANY KIND,
|
17
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
18
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
|
19
|
+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
|
20
|
+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
|
21
|
+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
|
22
|
+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
= term_extraction
|
2
|
+
|
3
|
+
== DESCRIPTION:
|
4
|
+
|
5
|
+
Provides access to term extraction APIs such as Yahoo! Term Extraction API and
|
6
|
+
Zemanta.
|
7
|
+
|
8
|
+
== SYNOPSIS:
|
9
|
+
|
10
|
+
# Query Yahoo! for terms
|
11
|
+
yahoo = TermExtraction::Yahoo.new(:api_key => 'myApiKey', :context => 'xbox 360 gears of war')
|
12
|
+
yahoo.terms # => ["gears of war", "xbox 360", "gears", "xbox"]
|
13
|
+
|
14
|
+
# Query Zemanta for terms
|
15
|
+
zemanta = TermExtraction::Zemanta.new(:api_key => 'myApiKey', :context => 'apple imac')
|
16
|
+
zemanta.terms # => ["Apple", "IMac", "Rumor", "Hardware", "Nvidia", "Macintosh", "Desktops", "AllInOne"]
|
17
|
+
|
18
|
+
== INSTALL:
|
19
|
+
|
20
|
+
* Via git:
|
21
|
+
|
22
|
+
git clone git://github.com/alexrabarts/term_extraction.git
|
23
|
+
|
24
|
+
* Via gem:
|
25
|
+
|
26
|
+
gem install alexrabarts-term_extraction -s http://gems.github.com
|
27
|
+
|
28
|
+
COPYRIGHT
|
29
|
+
=========
|
30
|
+
|
31
|
+
Copyright (c) 2009 Stateless Systems (http://statelesssystems.com). See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,55 @@
|
|
1
|
+
require 'rake'
|
2
|
+
|
3
|
+
begin
|
4
|
+
require 'jeweler'
|
5
|
+
Jeweler::Tasks.new do |s|
|
6
|
+
s.name = "term_extraction"
|
7
|
+
s.summary = %Q{
|
8
|
+
Provides access to term extraction APIs such as Yahoo! Term Extraction API and
|
9
|
+
Zemanta.
|
10
|
+
}
|
11
|
+
s.email = "alexrabarts@gmail.com"
|
12
|
+
s.homepage = "http://github.com/alexrabarts/term_extraction"
|
13
|
+
s.description = "Term extraction library"
|
14
|
+
s.authors = ["alex"]
|
15
|
+
s.add_dependency 'nokogiri', ['>=1.0.7']
|
16
|
+
end
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler not available. Install it with: sudo gem install technicalpickles-jeweler -s http://gems.github.com"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/rdoctask'
|
22
|
+
Rake::RDocTask.new do |rdoc|
|
23
|
+
rdoc.rdoc_dir = 'rdoc'
|
24
|
+
rdoc.title = 'term_extraction'
|
25
|
+
rdoc.options << '--line-numbers' << '--inline-source'
|
26
|
+
rdoc.rdoc_files.include('README*')
|
27
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
28
|
+
end
|
29
|
+
|
30
|
+
require 'rake/testtask'
|
31
|
+
Rake::TestTask.new(:test) do |t|
|
32
|
+
t.libs << 'lib' << 'test'
|
33
|
+
t.pattern = 'test/**/*_test.rb'
|
34
|
+
t.verbose = false
|
35
|
+
end
|
36
|
+
|
37
|
+
begin
|
38
|
+
require 'rcov/rcovtask'
|
39
|
+
Rcov::RcovTask.new do |t|
|
40
|
+
t.libs << 'test'
|
41
|
+
t.test_files = FileList['test/**/*_test.rb']
|
42
|
+
t.verbose = true
|
43
|
+
end
|
44
|
+
rescue LoadError
|
45
|
+
puts "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
46
|
+
end
|
47
|
+
|
48
|
+
begin
|
49
|
+
require 'cucumber/rake/task'
|
50
|
+
Cucumber::Rake::Task.new(:features)
|
51
|
+
rescue LoadError
|
52
|
+
puts "Cucumber is not available. In order to run features, you must: sudo gem install cucumber"
|
53
|
+
end
|
54
|
+
|
55
|
+
task :default => :test
|
data/VERSION.yml
ADDED
@@ -0,0 +1,54 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'addressable/uri'
|
3
|
+
require 'open-uri'
|
4
|
+
|
5
|
+
class TermExtraction
|
6
|
+
class Yahoo < TermExtraction
|
7
|
+
def terms
|
8
|
+
terms = []
|
9
|
+
data = Nokogiri::XML.parse(remote_xml)
|
10
|
+
|
11
|
+
unless data.nil?
|
12
|
+
data.search('//s:Result', ns).each do |n|
|
13
|
+
terms << n.text
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
terms
|
18
|
+
end
|
19
|
+
|
20
|
+
def uri
|
21
|
+
api_uri = Addressable::URI.parse(gateway)
|
22
|
+
api_uri.query_values = {
|
23
|
+
'appid' => @api_key,
|
24
|
+
'output' => 'xml',
|
25
|
+
'context' => @context
|
26
|
+
}
|
27
|
+
api_uri
|
28
|
+
end
|
29
|
+
|
30
|
+
class << self
|
31
|
+
def canonical_name
|
32
|
+
'yahoo'
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
private
|
37
|
+
def ns
|
38
|
+
{'s' => 'urn:yahoo:cate'}
|
39
|
+
end
|
40
|
+
|
41
|
+
def gateway
|
42
|
+
'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
|
43
|
+
end
|
44
|
+
|
45
|
+
def remote_xml
|
46
|
+
begin
|
47
|
+
open(uri).read
|
48
|
+
rescue => e
|
49
|
+
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
50
|
+
nil
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'net/http'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'addressable/uri'
|
4
|
+
|
5
|
+
class TermExtraction
|
6
|
+
class Zemanta < TermExtraction
|
7
|
+
def terms
|
8
|
+
terms = []
|
9
|
+
data = Nokogiri::XML.parse(remote_xml)
|
10
|
+
|
11
|
+
data.search('//name').each do |n|
|
12
|
+
terms << n.text
|
13
|
+
end
|
14
|
+
|
15
|
+
terms
|
16
|
+
end
|
17
|
+
|
18
|
+
def uri
|
19
|
+
Addressable::URI.parse(gateway)
|
20
|
+
end
|
21
|
+
|
22
|
+
def post_params
|
23
|
+
{
|
24
|
+
'method' =>'zemanta.suggest',
|
25
|
+
'api_key' => @api_key,
|
26
|
+
'return_images' => 0,
|
27
|
+
'text' => @context,
|
28
|
+
'format' => 'xml'
|
29
|
+
}
|
30
|
+
end
|
31
|
+
|
32
|
+
class << self
|
33
|
+
def canonical_name
|
34
|
+
'zemanta'
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
def gateway
|
40
|
+
'http://api.zemanta.com/services/rest/0.0/'
|
41
|
+
end
|
42
|
+
|
43
|
+
def remote_xml
|
44
|
+
begin
|
45
|
+
Net::HTTP.post_form(uri, post_params).body
|
46
|
+
rescue => e
|
47
|
+
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
class TermExtraction
|
2
|
+
attr_accessor :context, :api_key
|
3
|
+
|
4
|
+
def initialize(options={})
|
5
|
+
@context = options[:context]
|
6
|
+
@api_key = options[:api_key]
|
7
|
+
end
|
8
|
+
|
9
|
+
def canonical_name
|
10
|
+
self.class.canonical_name
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
%w{yahoo zemanta}.each{|t| require "term_extraction/#{t}"}
|
@@ -0,0 +1,59 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{term_extraction}
|
8
|
+
s.version = "0.1.4"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["alex"]
|
12
|
+
s.date = %q{2009-12-20}
|
13
|
+
s.description = %q{Term extraction library}
|
14
|
+
s.email = %q{alexrabarts@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENSE",
|
22
|
+
"README",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION.yml",
|
25
|
+
"lib/term_extraction.rb",
|
26
|
+
"lib/term_extraction/yahoo.rb",
|
27
|
+
"lib/term_extraction/zemanta.rb",
|
28
|
+
"term_extraction.gemspec",
|
29
|
+
"test/fixtures/yahoo.xml",
|
30
|
+
"test/fixtures/yahoo2.xml",
|
31
|
+
"test/fixtures/zemanta.xml",
|
32
|
+
"test/fixtures/zemanta2.xml",
|
33
|
+
"test/term_extraction_test.rb",
|
34
|
+
"test/test_helper.rb"
|
35
|
+
]
|
36
|
+
s.homepage = %q{http://github.com/alexrabarts/term_extraction}
|
37
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
38
|
+
s.require_paths = ["lib"]
|
39
|
+
s.rubygems_version = %q{1.3.5}
|
40
|
+
s.summary = %q{Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta.}
|
41
|
+
s.test_files = [
|
42
|
+
"test/term_extraction_test.rb",
|
43
|
+
"test/test_helper.rb"
|
44
|
+
]
|
45
|
+
|
46
|
+
if s.respond_to? :specification_version then
|
47
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
48
|
+
s.specification_version = 3
|
49
|
+
|
50
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
51
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 1.0.7"])
|
52
|
+
else
|
53
|
+
s.add_dependency(%q<nokogiri>, [">= 1.0.7"])
|
54
|
+
end
|
55
|
+
else
|
56
|
+
s.add_dependency(%q<nokogiri>, [">= 1.0.7"])
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
@@ -0,0 +1,3 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractionResponse.xsd"><Result>gears of war</Result><Result>gears</Result></ResultSet>
|
3
|
+
<!-- ws04.search.scd.yahoo.com uncompressed/chunked Thu Feb 26 21:04:16 PST 2009 -->
|
@@ -0,0 +1,3 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<ResultSet xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xmlns="urn:yahoo:cate" xsi:schemaLocation="urn:yahoo:cate http://api.search.yahoo.com/ContentAnalysisService/V1/TermExtractionResponse.xsd"><Result>fears of war</Result><Result>fears</Result></ResultSet>
|
3
|
+
<!-- ws04.search.scd.yahoo.com uncompressed/chunked Thu Feb 26 21:04:16 PST 2009 -->
|
@@ -0,0 +1,104 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<rsp>
|
3
|
+
<status>ok</status>
|
4
|
+
<articles>
|
5
|
+
<article>
|
6
|
+
<url>http://www.crunchgear.com/2009/02/24/nvidia-based-imacs-coming-soon/</url>
|
7
|
+
<confidence>0.033153</confidence>
|
8
|
+
<published_datetime>2009-02-24T18:00:45Z</published_datetime>
|
9
|
+
<zemified>0</zemified>
|
10
|
+
<title>NVIDIA-based iMacs coming soon?</title>
|
11
|
+
</article><article>
|
12
|
+
<url>http://www.tuaw.com/2009/02/24/rumor-new-imacs-around-the-bend/</url>
|
13
|
+
<confidence>0.028595</confidence>
|
14
|
+
<published_datetime>2009-02-24T22:00:00Z</published_datetime>
|
15
|
+
<zemified>0</zemified>
|
16
|
+
<title>Rumor: New iMacs around the bend</title>
|
17
|
+
</article><article>
|
18
|
+
<url>http://www.engadget.com/2009/02/24/a-few-new-rumors-point-to-two-new-nvidia-packing-imacs/</url>
|
19
|
+
<confidence>0.027595</confidence>
|
20
|
+
<published_datetime>2009-02-24T13:03:00Z</published_datetime>
|
21
|
+
<zemified>0</zemified>
|
22
|
+
<title>A few new rumors point to two new NVIDIA-packing iMacs</title>
|
23
|
+
</article><article>
|
24
|
+
<url>http://www.techmeme.com/090126/p31</url>
|
25
|
+
<confidence>0.019056</confidence>
|
26
|
+
<published_datetime>2009-01-26T17:00:22Z</published_datetime>
|
27
|
+
<zemified>0</zemified>
|
28
|
+
<title>Chip complex delaying Apple's new iMac line, says analyst (Zach Spear/AppleInsider)</title>
|
29
|
+
</article><article>
|
30
|
+
<url>http://i.gizmodo.com/5143476/apple-warns-resellers-of-reduced-imac-availability-new-models-on-the-way</url>
|
31
|
+
<confidence>0.018611</confidence>
|
32
|
+
<published_datetime>2009-01-31T16:30:00Z</published_datetime>
|
33
|
+
<zemified>0</zemified>
|
34
|
+
<title>Apple Warns Resellers of Reduced iMac Availability: New Models On the Way? [Apple]</title>
|
35
|
+
</article><article>
|
36
|
+
<url>http://cultofmac.com/analyst-new-imacs-delayed-for-chips-snow-leopard/7549</url>
|
37
|
+
<confidence>0.01685</confidence>
|
38
|
+
<published_datetime>2009-01-26T16:43:44Z</published_datetime>
|
39
|
+
<zemified>0</zemified>
|
40
|
+
<title>Analyst: New iMacs Delayed For Chips, Snow Leopard</title>
|
41
|
+
</article><article>
|
42
|
+
<url>http://www.ubergizmo.com/15/archives/2008/12/new_imac_allinone_pc_confirmed.html</url>
|
43
|
+
<confidence>0.016311</confidence>
|
44
|
+
<published_datetime>2008-12-24T01:06:56Z</published_datetime>
|
45
|
+
<zemified>0</zemified>
|
46
|
+
<title>New iMac All-In-One PC Confirmed</title>
|
47
|
+
</article><article>
|
48
|
+
<url>http://www.labnol.org/gadgets/exchange-windows-pc-for-apple-mac/5775/</url>
|
49
|
+
<confidence>0.016238</confidence>
|
50
|
+
<published_datetime>2008-12-03T08:17:35Z</published_datetime>
|
51
|
+
<zemified>0</zemified>
|
52
|
+
<title>Exchange Your Windows PC or TV for an Apple iMac</title>
|
53
|
+
</article><article>
|
54
|
+
<url>http://cultofmac.com/class-action-lawsuit-over-imac-display-problems/6562</url>
|
55
|
+
<confidence>0.015608</confidence>
|
56
|
+
<published_datetime>2009-01-02T16:17:06Z</published_datetime>
|
57
|
+
<zemified>0</zemified>
|
58
|
+
<title>Class-Action Lawsuit Over iMac Display Problems</title>
|
59
|
+
</article><article>
|
60
|
+
<url>http://www.crunchgear.com/2008/12/29/new-imacs-to-include-hotter-components-new-cooling/</url>
|
61
|
+
<confidence>0.015093</confidence>
|
62
|
+
<published_datetime>2008-12-29T23:50:04Z</published_datetime>
|
63
|
+
<zemified>0</zemified>
|
64
|
+
<title>New iMacs to include hotter components, new cooling?</title>
|
65
|
+
</article>
|
66
|
+
</articles><markup>
|
67
|
+
<text>apple imac</text>
|
68
|
+
</markup><signature><div class="zemanta-pixie"><a class="zemanta-pixie-a" href="http://reblog.zemanta.com/zemified/3695b55a-d68e-4328-bf86-c3f01be8b2da/" title="Zemified by Zemanta"><img class="zemanta-pixie-img" src="http://img.zemanta.com/reblog_e.png?x-id=3695b55a-d68e-4328-bf86-c3f01be8b2da" alt="Reblog this post [with Zemanta]" /></a></div></signature>
|
69
|
+
<keywords>
|
70
|
+
<keyword>
|
71
|
+
<confidence>0.404748</confidence>
|
72
|
+
<scheme>general</scheme>
|
73
|
+
<name>Apple</name>
|
74
|
+
</keyword><keyword>
|
75
|
+
<confidence>0.277753</confidence>
|
76
|
+
<scheme>general</scheme>
|
77
|
+
<name>IMac</name>
|
78
|
+
</keyword><keyword>
|
79
|
+
<confidence>0.094392</confidence>
|
80
|
+
<scheme>general</scheme>
|
81
|
+
<name>Rumor</name>
|
82
|
+
</keyword><keyword>
|
83
|
+
<confidence>0.076399</confidence>
|
84
|
+
<scheme>general</scheme>
|
85
|
+
<name>Hardware</name>
|
86
|
+
</keyword><keyword>
|
87
|
+
<confidence>0.072186</confidence>
|
88
|
+
<scheme>general</scheme>
|
89
|
+
<name>Nvidia</name>
|
90
|
+
</keyword><keyword>
|
91
|
+
<confidence>0.069183</confidence>
|
92
|
+
<scheme>general</scheme>
|
93
|
+
<name>Macintosh</name>
|
94
|
+
</keyword><keyword>
|
95
|
+
<confidence>0.050487</confidence>
|
96
|
+
<scheme>general</scheme>
|
97
|
+
<name>Desktops</name>
|
98
|
+
</keyword><keyword>
|
99
|
+
<confidence>0.039205</confidence>
|
100
|
+
<scheme>general</scheme>
|
101
|
+
<name>AllInOne</name>
|
102
|
+
</keyword>
|
103
|
+
</keywords><rid>3695b55a-d68e-4328-bf86-c3f01be8b2da</rid>
|
104
|
+
</rsp>
|
@@ -0,0 +1,104 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<rsp>
|
3
|
+
<status>ok</status>
|
4
|
+
<articles>
|
5
|
+
<article>
|
6
|
+
<url>http://www.crunchgear.com/2009/02/24/nvidia-based-imacs-coming-soon/</url>
|
7
|
+
<confidence>0.033153</confidence>
|
8
|
+
<published_datetime>2009-02-24T18:00:45Z</published_datetime>
|
9
|
+
<zemified>0</zemified>
|
10
|
+
<title>NVIDIA-based iMacs coming soon?</title>
|
11
|
+
</article><article>
|
12
|
+
<url>http://www.tuaw.com/2009/02/24/rumor-new-imacs-around-the-bend/</url>
|
13
|
+
<confidence>0.028595</confidence>
|
14
|
+
<published_datetime>2009-02-24T22:00:00Z</published_datetime>
|
15
|
+
<zemified>0</zemified>
|
16
|
+
<title>Rumor: New iMacs around the bend</title>
|
17
|
+
</article><article>
|
18
|
+
<url>http://www.engadget.com/2009/02/24/a-few-new-rumors-point-to-two-new-nvidia-packing-imacs/</url>
|
19
|
+
<confidence>0.027595</confidence>
|
20
|
+
<published_datetime>2009-02-24T13:03:00Z</published_datetime>
|
21
|
+
<zemified>0</zemified>
|
22
|
+
<title>A few new rumors point to two new NVIDIA-packing iMacs</title>
|
23
|
+
</article><article>
|
24
|
+
<url>http://www.techmeme.com/090126/p31</url>
|
25
|
+
<confidence>0.019056</confidence>
|
26
|
+
<published_datetime>2009-01-26T17:00:22Z</published_datetime>
|
27
|
+
<zemified>0</zemified>
|
28
|
+
<title>Chip complex delaying Apple's new iMac line, says analyst (Zach Spear/AppleInsider)</title>
|
29
|
+
</article><article>
|
30
|
+
<url>http://i.gizmodo.com/5143476/apple-warns-resellers-of-reduced-imac-availability-new-models-on-the-way</url>
|
31
|
+
<confidence>0.018611</confidence>
|
32
|
+
<published_datetime>2009-01-31T16:30:00Z</published_datetime>
|
33
|
+
<zemified>0</zemified>
|
34
|
+
<title>Apple Warns Resellers of Reduced iMac Availability: New Models On the Way? [Apple]</title>
|
35
|
+
</article><article>
|
36
|
+
<url>http://cultofmac.com/analyst-new-imacs-delayed-for-chips-snow-leopard/7549</url>
|
37
|
+
<confidence>0.01685</confidence>
|
38
|
+
<published_datetime>2009-01-26T16:43:44Z</published_datetime>
|
39
|
+
<zemified>0</zemified>
|
40
|
+
<title>Analyst: New iMacs Delayed For Chips, Snow Leopard</title>
|
41
|
+
</article><article>
|
42
|
+
<url>http://www.ubergizmo.com/15/archives/2008/12/new_imac_allinone_pc_confirmed.html</url>
|
43
|
+
<confidence>0.016311</confidence>
|
44
|
+
<published_datetime>2008-12-24T01:06:56Z</published_datetime>
|
45
|
+
<zemified>0</zemified>
|
46
|
+
<title>New iMac All-In-One PC Confirmed</title>
|
47
|
+
</article><article>
|
48
|
+
<url>http://www.labnol.org/gadgets/exchange-windows-pc-for-apple-mac/5775/</url>
|
49
|
+
<confidence>0.016238</confidence>
|
50
|
+
<published_datetime>2008-12-03T08:17:35Z</published_datetime>
|
51
|
+
<zemified>0</zemified>
|
52
|
+
<title>Exchange Your Windows PC or TV for an Apple iMac</title>
|
53
|
+
</article><article>
|
54
|
+
<url>http://cultofmac.com/class-action-lawsuit-over-imac-display-problems/6562</url>
|
55
|
+
<confidence>0.015608</confidence>
|
56
|
+
<published_datetime>2009-01-02T16:17:06Z</published_datetime>
|
57
|
+
<zemified>0</zemified>
|
58
|
+
<title>Class-Action Lawsuit Over iMac Display Problems</title>
|
59
|
+
</article><article>
|
60
|
+
<url>http://www.crunchgear.com/2008/12/29/new-imacs-to-include-hotter-components-new-cooling/</url>
|
61
|
+
<confidence>0.015093</confidence>
|
62
|
+
<published_datetime>2008-12-29T23:50:04Z</published_datetime>
|
63
|
+
<zemified>0</zemified>
|
64
|
+
<title>New iMacs to include hotter components, new cooling?</title>
|
65
|
+
</article>
|
66
|
+
</articles><markup>
|
67
|
+
<text>apple imac</text>
|
68
|
+
</markup><signature><div class="zemanta-pixie"><a class="zemanta-pixie-a" href="http://reblog.zemanta.com/zemified/3695b55a-d68e-4328-bf86-c3f01be8b2da/" title="Zemified by Zemanta"><img class="zemanta-pixie-img" src="http://img.zemanta.com/reblog_e.png?x-id=3695b55a-d68e-4328-bf86-c3f01be8b2da" alt="Reblog this post [with Zemanta]" /></a></div></signature>
|
69
|
+
<keywords>
|
70
|
+
<keyword>
|
71
|
+
<confidence>0.404748</confidence>
|
72
|
+
<scheme>general</scheme>
|
73
|
+
<name>Zapple</name>
|
74
|
+
</keyword><keyword>
|
75
|
+
<confidence>0.277753</confidence>
|
76
|
+
<scheme>general</scheme>
|
77
|
+
<name>AMac</name>
|
78
|
+
</keyword><keyword>
|
79
|
+
<confidence>0.094392</confidence>
|
80
|
+
<scheme>general</scheme>
|
81
|
+
<name>Tumor</name>
|
82
|
+
</keyword><keyword>
|
83
|
+
<confidence>0.076399</confidence>
|
84
|
+
<scheme>general</scheme>
|
85
|
+
<name>Shardware</name>
|
86
|
+
</keyword><keyword>
|
87
|
+
<confidence>0.072186</confidence>
|
88
|
+
<scheme>general</scheme>
|
89
|
+
<name>Mvidia</name>
|
90
|
+
</keyword><keyword>
|
91
|
+
<confidence>0.069183</confidence>
|
92
|
+
<scheme>general</scheme>
|
93
|
+
<name>Hackintosh</name>
|
94
|
+
</keyword><keyword>
|
95
|
+
<confidence>0.050487</confidence>
|
96
|
+
<scheme>general</scheme>
|
97
|
+
<name>Zesktops</name>
|
98
|
+
</keyword><keyword>
|
99
|
+
<confidence>0.039205</confidence>
|
100
|
+
<scheme>general</scheme>
|
101
|
+
<name>AllInNone</name>
|
102
|
+
</keyword>
|
103
|
+
</keywords><rid>3695b55a-d68e-4328-bf86-c3f01be8b2da</rid>
|
104
|
+
</rsp>
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
|
3
|
+
class TermExtractionTest < Test::Unit::TestCase
|
4
|
+
should 'return correct terms from Yahoo!' do
|
5
|
+
yahoo = TermExtraction::Yahoo.new
|
6
|
+
fake_uri(:get, yahoo.uri, 'yahoo.xml')
|
7
|
+
assert_equal yahoo.terms, correct_yahoo_terms
|
8
|
+
end
|
9
|
+
|
10
|
+
should 'return correct terms from Zemanta' do
|
11
|
+
zemanta = TermExtraction::Zemanta.new
|
12
|
+
fake_uri(:post, zemanta.uri, 'zemanta.xml')
|
13
|
+
assert_equal zemanta.terms, correct_zemanta_terms
|
14
|
+
end
|
15
|
+
|
16
|
+
should 'be able to set the context after initialization' do
|
17
|
+
yahoo = TermExtraction::Yahoo.new
|
18
|
+
context = 'foo'
|
19
|
+
yahoo.context = context
|
20
|
+
assert_equal yahoo.context, context
|
21
|
+
end
|
22
|
+
|
23
|
+
should 'be able to set the api key after initialization' do
|
24
|
+
zemanta = TermExtraction::Zemanta.new
|
25
|
+
context = 'bar'
|
26
|
+
zemanta.context = context
|
27
|
+
assert_equal zemanta.context, context
|
28
|
+
end
|
29
|
+
|
30
|
+
should 'return different response on subsequent calls when different data is returned from Yahoo!' do
|
31
|
+
yahoo = TermExtraction::Yahoo.new
|
32
|
+
fake_uri(:get, yahoo.uri, 'yahoo.xml')
|
33
|
+
original_terms = yahoo.terms
|
34
|
+
fake_uri(:get, yahoo.uri, 'yahoo2.xml')
|
35
|
+
assert_not_equal original_terms, yahoo.terms
|
36
|
+
end
|
37
|
+
|
38
|
+
should 'return different response on subsequent calls when different data is returned from Zemanta' do
|
39
|
+
zemanta = TermExtraction::Zemanta.new
|
40
|
+
fake_uri(:post, zemanta.uri, 'zemanta.xml')
|
41
|
+
original_terms = zemanta.terms
|
42
|
+
fake_uri(:post, zemanta.uri, 'zemanta2.xml')
|
43
|
+
assert_not_equal original_terms, zemanta.terms
|
44
|
+
end
|
45
|
+
|
46
|
+
context 'Yahoo!' do
|
47
|
+
should 'be able to handle a context with "%" in it' do
|
48
|
+
yahoo = TermExtraction::Yahoo.new(:context => '%')
|
49
|
+
fake_uri(:get, yahoo.uri, 'yahoo.xml')
|
50
|
+
assert_nothing_thrown{ yahoo.terms }
|
51
|
+
end
|
52
|
+
end
|
53
|
+
|
54
|
+
private
|
55
|
+
def correct_yahoo_terms
|
56
|
+
['gears of war', 'gears']
|
57
|
+
end
|
58
|
+
|
59
|
+
def correct_zemanta_terms
|
60
|
+
['Apple', 'IMac', 'Rumor', 'Hardware', 'Nvidia', 'Macintosh', 'Desktops', 'AllInOne']
|
61
|
+
end
|
62
|
+
end
|
data/test/test_helper.rb
ADDED
@@ -0,0 +1,22 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'test/unit'
|
3
|
+
require 'shoulda'
|
4
|
+
require 'mocha'
|
5
|
+
require 'fake_web'
|
6
|
+
|
7
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
8
|
+
require 'term_extraction'
|
9
|
+
|
10
|
+
class Test::Unit::TestCase
|
11
|
+
def fake_uri(method, uri, fixture)
|
12
|
+
FakeWeb.clean_registry
|
13
|
+
FakeWeb.allow_net_connect = false
|
14
|
+
response = File.open(File.join(File.dirname(__FILE__), 'fixtures', fixture)).read
|
15
|
+
FakeWeb.register_uri(method, uri.to_s, :string => response)
|
16
|
+
end
|
17
|
+
|
18
|
+
def teardown
|
19
|
+
FakeWeb.allow_net_connect = true
|
20
|
+
FakeWeb.clean_registry
|
21
|
+
end
|
22
|
+
end
|
metadata
ADDED
@@ -0,0 +1,80 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: term_extraction
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 0.1.4
|
5
|
+
platform: ruby
|
6
|
+
authors:
|
7
|
+
- alex
|
8
|
+
autorequire:
|
9
|
+
bindir: bin
|
10
|
+
cert_chain: []
|
11
|
+
|
12
|
+
date: 2009-12-20 00:00:00 -07:00
|
13
|
+
default_executable:
|
14
|
+
dependencies:
|
15
|
+
- !ruby/object:Gem::Dependency
|
16
|
+
name: nokogiri
|
17
|
+
type: :runtime
|
18
|
+
version_requirement:
|
19
|
+
version_requirements: !ruby/object:Gem::Requirement
|
20
|
+
requirements:
|
21
|
+
- - ">="
|
22
|
+
- !ruby/object:Gem::Version
|
23
|
+
version: 1.0.7
|
24
|
+
version:
|
25
|
+
description: Term extraction library
|
26
|
+
email: alexrabarts@gmail.com
|
27
|
+
executables: []
|
28
|
+
|
29
|
+
extensions: []
|
30
|
+
|
31
|
+
extra_rdoc_files:
|
32
|
+
- LICENSE
|
33
|
+
- README
|
34
|
+
files:
|
35
|
+
- .gitignore
|
36
|
+
- LICENSE
|
37
|
+
- README
|
38
|
+
- Rakefile
|
39
|
+
- VERSION.yml
|
40
|
+
- lib/term_extraction.rb
|
41
|
+
- lib/term_extraction/yahoo.rb
|
42
|
+
- lib/term_extraction/zemanta.rb
|
43
|
+
- term_extraction.gemspec
|
44
|
+
- test/fixtures/yahoo.xml
|
45
|
+
- test/fixtures/yahoo2.xml
|
46
|
+
- test/fixtures/zemanta.xml
|
47
|
+
- test/fixtures/zemanta2.xml
|
48
|
+
- test/term_extraction_test.rb
|
49
|
+
- test/test_helper.rb
|
50
|
+
has_rdoc: true
|
51
|
+
homepage: http://github.com/alexrabarts/term_extraction
|
52
|
+
licenses: []
|
53
|
+
|
54
|
+
post_install_message:
|
55
|
+
rdoc_options:
|
56
|
+
- --charset=UTF-8
|
57
|
+
require_paths:
|
58
|
+
- lib
|
59
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
60
|
+
requirements:
|
61
|
+
- - ">="
|
62
|
+
- !ruby/object:Gem::Version
|
63
|
+
version: "0"
|
64
|
+
version:
|
65
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
66
|
+
requirements:
|
67
|
+
- - ">="
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: "0"
|
70
|
+
version:
|
71
|
+
requirements: []
|
72
|
+
|
73
|
+
rubyforge_project:
|
74
|
+
rubygems_version: 1.3.5
|
75
|
+
signing_key:
|
76
|
+
specification_version: 3
|
77
|
+
summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta.
|
78
|
+
test_files:
|
79
|
+
- test/term_extraction_test.rb
|
80
|
+
- test/test_helper.rb
|