term-extraction 0.1.4 → 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +2 -1
- data/Gemfile.lock +3 -1
- data/README.rdoc +28 -0
- data/VERSION.yml +5 -4
- data/lib/term_extraction/yahoo.rb +3 -4
- data/lib/term_extraction/zemanta.rb +2 -2
- data/term-extraction.gemspec +13 -10
- data/test/fixtures/yahoo.xml +3 -3
- data/test/fixtures/yahoo2.xml +3 -3
- data/test/term_extraction_test.rb +15 -5
- metadata +92 -86
- data/README +0 -31
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,6 +1,7 @@
|
|
1
1
|
GEM
|
2
|
-
remote:
|
2
|
+
remote: https://rubygems.org/
|
3
3
|
specs:
|
4
|
+
addressable (2.3.5)
|
4
5
|
fakeweb (1.3.0)
|
5
6
|
git (1.2.5)
|
6
7
|
jeweler (1.6.4)
|
@@ -21,6 +22,7 @@ PLATFORMS
|
|
21
22
|
ruby
|
22
23
|
|
23
24
|
DEPENDENCIES
|
25
|
+
addressable
|
24
26
|
fakeweb
|
25
27
|
jeweler
|
26
28
|
mocha
|
data/README.rdoc
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
= term_extraction
|
2
|
+
|
3
|
+
== DESCRIPTION:
|
4
|
+
|
5
|
+
Provides access to term extraction APIs such as Yahoo! YQL and Zemanta.
|
6
|
+
|
7
|
+
== SYNOPSIS:
|
8
|
+
|
9
|
+
# Query Yahoo! for terms
|
10
|
+
yahoo = TermExtraction::Yahoo.new(:context => 'Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration')
|
11
|
+
yahoo.terms # => ["Italian sculptors", "the Virgin Mary"]
|
12
|
+
|
13
|
+
# Query Zemanta for terms
|
14
|
+
zemanta = TermExtraction::Zemanta.new(:api_key => 'myApiKey', :context => 'apple imac')
|
15
|
+
zemanta.terms # => ["Apple", "IMac", "Rumor", "Hardware", "Nvidia", "Macintosh", "Desktops", "AllInOne"]
|
16
|
+
|
17
|
+
== INSTALL:
|
18
|
+
|
19
|
+
gem install term-extraction # require 'term_extraction'
|
20
|
+
|
21
|
+
Or in your `Gemfile`:
|
22
|
+
|
23
|
+
gem 'term-extraction', require => 'term_extraction'
|
24
|
+
|
25
|
+
COPYRIGHT
|
26
|
+
=========
|
27
|
+
|
28
|
+
Copyright (c) 2011 Stateless Systems (http://statelesssystems.com). See LICENSE for details.
|
data/VERSION.yml
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
---
|
2
|
-
:
|
3
|
-
:
|
4
|
-
:
|
1
|
+
---
|
2
|
+
:major: 1
|
3
|
+
:minor: 0
|
4
|
+
:patch: 0
|
5
|
+
:build:
|
@@ -1,4 +1,5 @@
|
|
1
1
|
require 'nokogiri'
|
2
|
+
require 'uri'
|
2
3
|
require 'addressable/uri'
|
3
4
|
require 'open-uri'
|
4
5
|
|
@@ -20,9 +21,7 @@ class TermExtraction
|
|
20
21
|
def uri
|
21
22
|
api_uri = Addressable::URI.parse(gateway)
|
22
23
|
api_uri.query_values = {
|
23
|
-
|
24
|
-
'output' => 'xml',
|
25
|
-
'context' => @context
|
24
|
+
'q' => "select * from search.termextract where context=\"#{@context}\""
|
26
25
|
}
|
27
26
|
api_uri
|
28
27
|
end
|
@@ -39,7 +38,7 @@ class TermExtraction
|
|
39
38
|
end
|
40
39
|
|
41
40
|
def gateway
|
42
|
-
'http://
|
41
|
+
'http://query.yahooapis.com/v1/public/yql'
|
43
42
|
end
|
44
43
|
|
45
44
|
def remote_xml
|
@@ -1,6 +1,6 @@
|
|
1
1
|
require 'net/http'
|
2
2
|
require 'nokogiri'
|
3
|
-
require '
|
3
|
+
require 'uri'
|
4
4
|
|
5
5
|
class TermExtraction
|
6
6
|
class Zemanta < TermExtraction
|
@@ -16,7 +16,7 @@ class TermExtraction
|
|
16
16
|
end
|
17
17
|
|
18
18
|
def uri
|
19
|
-
|
19
|
+
URI.parse gateway
|
20
20
|
end
|
21
21
|
|
22
22
|
def post_params
|
data/term-extraction.gemspec
CHANGED
@@ -4,23 +4,23 @@
|
|
4
4
|
# -*- encoding: utf-8 -*-
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
|
-
s.name =
|
8
|
-
s.version = "0.
|
7
|
+
s.name = "term-extraction"
|
8
|
+
s.version = "1.0.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["alex"]
|
12
|
-
s.date =
|
13
|
-
s.description =
|
14
|
-
s.email =
|
12
|
+
s.date = "2013-09-13"
|
13
|
+
s.description = "Term extraction library"
|
14
|
+
s.email = "alexrabarts@gmail.com"
|
15
15
|
s.extra_rdoc_files = [
|
16
16
|
"LICENSE",
|
17
|
-
"README"
|
17
|
+
"README.rdoc"
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
"Gemfile",
|
21
21
|
"Gemfile.lock",
|
22
22
|
"LICENSE",
|
23
|
-
"README",
|
23
|
+
"README.rdoc",
|
24
24
|
"Rakefile",
|
25
25
|
"VERSION.yml",
|
26
26
|
"lib/term_extraction.rb",
|
@@ -34,10 +34,10 @@ Gem::Specification.new do |s|
|
|
34
34
|
"test/term_extraction_test.rb",
|
35
35
|
"test/test_helper.rb"
|
36
36
|
]
|
37
|
-
s.homepage =
|
37
|
+
s.homepage = "http://github.com/alexrabarts/term_extraction"
|
38
38
|
s.require_paths = ["lib"]
|
39
|
-
s.rubygems_version =
|
40
|
-
s.summary =
|
39
|
+
s.rubygems_version = "1.8.25"
|
40
|
+
s.summary = "Provides access to term extraction APIs such as Yahoo! Term Extraction API and Zemanta."
|
41
41
|
|
42
42
|
if s.respond_to? :specification_version then
|
43
43
|
s.specification_version = 3
|
@@ -46,17 +46,20 @@ Gem::Specification.new do |s|
|
|
46
46
|
s.add_runtime_dependency(%q<rake>, [">= 0"])
|
47
47
|
s.add_runtime_dependency(%q<rdoc>, [">= 0"])
|
48
48
|
s.add_runtime_dependency(%q<nokogiri>, [">= 1.0.7"])
|
49
|
+
s.add_runtime_dependency(%q<addressable>, [">= 0"])
|
49
50
|
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
50
51
|
else
|
51
52
|
s.add_dependency(%q<rake>, [">= 0"])
|
52
53
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
53
54
|
s.add_dependency(%q<nokogiri>, [">= 1.0.7"])
|
55
|
+
s.add_dependency(%q<addressable>, [">= 0"])
|
54
56
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
55
57
|
end
|
56
58
|
else
|
57
59
|
s.add_dependency(%q<rake>, [">= 0"])
|
58
60
|
s.add_dependency(%q<rdoc>, [">= 0"])
|
59
61
|
s.add_dependency(%q<nokogiri>, [">= 1.0.7"])
|
62
|
+
s.add_dependency(%q<addressable>, [">= 0"])
|
60
63
|
s.add_dependency(%q<jeweler>, [">= 0"])
|
61
64
|
end
|
62
65
|
end
|
data/test/fixtures/yahoo.xml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
<?xml version="1.0"?>
|
2
|
-
<
|
3
|
-
<!--
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<query xmlns:yahoo="http://www.yahooapis.com/v1/base.rng" yahoo:count="2" yahoo:created="2013-09-13T12:16:37Z" yahoo:lang="en-US"><results><Result xmlns="urn:yahoo:cate">gears of war</Result><Result xmlns="urn:yahoo:cate">gears</Result></results></query><!-- total: 24 -->
|
3
|
+
<!-- engine2.yql.bf1.yahoo.com -->
|
data/test/fixtures/yahoo2.xml
CHANGED
@@ -1,3 +1,3 @@
|
|
1
|
-
<?xml version="1.0"?>
|
2
|
-
<
|
3
|
-
<!--
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<query xmlns:yahoo="http://www.yahooapis.com/v1/base.rng" yahoo:count="2" yahoo:created="2013-09-13T12:16:37Z" yahoo:lang="en-US"><results><Result xmlns="urn:yahoo:cate">fears of war</Result><Result xmlns="urn:yahoo:cate">fears</Result></results></query><!-- total: 24 -->
|
3
|
+
<!-- engine2.yql.bf1.yahoo.com -->
|
@@ -1,30 +1,40 @@
|
|
1
1
|
require File.dirname(__FILE__) + '/test_helper'
|
2
|
+
require 'addressable/uri'
|
2
3
|
|
3
4
|
class TermExtractionTest < Test::Unit::TestCase
|
4
5
|
should 'return correct terms from Yahoo!' do
|
5
6
|
yahoo = TermExtraction::Yahoo.new
|
6
7
|
fake_uri(:get, yahoo.uri, 'yahoo.xml')
|
7
|
-
assert_equal yahoo.terms
|
8
|
+
assert_equal correct_yahoo_terms, yahoo.terms
|
8
9
|
end
|
9
10
|
|
10
11
|
should 'return correct terms from Zemanta' do
|
11
12
|
zemanta = TermExtraction::Zemanta.new
|
12
13
|
fake_uri(:post, zemanta.uri, 'zemanta.xml')
|
13
|
-
assert_equal zemanta.terms
|
14
|
+
assert_equal correct_zemanta_terms, zemanta.terms
|
15
|
+
end
|
16
|
+
|
17
|
+
should 'set the correct API uri for Yahoo!' do
|
18
|
+
yahoo = TermExtraction::Yahoo.new(:context => 'the context')
|
19
|
+
uri = Addressable::URI.parse('http://query.yahooapis.com/v1/public/yql')
|
20
|
+
uri.query_values = {
|
21
|
+
'q' => "select * from search.termextract where context=\"the context\""
|
22
|
+
}
|
23
|
+
assert_equal uri.to_s, yahoo.uri.to_s
|
14
24
|
end
|
15
25
|
|
16
26
|
should 'be able to set the context after initialization' do
|
17
27
|
yahoo = TermExtraction::Yahoo.new
|
18
28
|
context = 'foo'
|
19
29
|
yahoo.context = context
|
20
|
-
assert_equal
|
30
|
+
assert_equal context, yahoo.context
|
21
31
|
end
|
22
32
|
|
23
33
|
should 'be able to set the api key after initialization' do
|
24
34
|
zemanta = TermExtraction::Zemanta.new
|
25
35
|
context = 'bar'
|
26
36
|
zemanta.context = context
|
27
|
-
assert_equal
|
37
|
+
assert_equal context, zemanta.context
|
28
38
|
end
|
29
39
|
|
30
40
|
should 'return different response on subsequent calls when different data is returned from Yahoo!' do
|
@@ -59,4 +69,4 @@ class TermExtractionTest < Test::Unit::TestCase
|
|
59
69
|
def correct_zemanta_terms
|
60
70
|
['Apple', 'IMac', 'Rumor', 'Hardware', 'Nvidia', 'Macintosh', 'Desktops', 'AllInOne']
|
61
71
|
end
|
62
|
-
end
|
72
|
+
end
|
metadata
CHANGED
@@ -1,95 +1,108 @@
|
|
1
|
-
--- !ruby/object:Gem::Specification
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
2
|
name: term-extraction
|
3
|
-
version: !ruby/object:Gem::Version
|
4
|
-
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.0
|
5
5
|
prerelease:
|
6
|
-
segments:
|
7
|
-
- 0
|
8
|
-
- 1
|
9
|
-
- 4
|
10
|
-
version: 0.1.4
|
11
6
|
platform: ruby
|
12
|
-
authors:
|
7
|
+
authors:
|
13
8
|
- alex
|
14
9
|
autorequire:
|
15
10
|
bindir: bin
|
16
11
|
cert_chain: []
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
dependencies:
|
21
|
-
- !ruby/object:Gem::Dependency
|
12
|
+
date: 2013-09-13 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
22
15
|
name: rake
|
23
|
-
|
24
|
-
requirement: &id001 !ruby/object:Gem::Requirement
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
25
17
|
none: false
|
26
|
-
requirements:
|
27
|
-
- -
|
28
|
-
- !ruby/object:Gem::Version
|
29
|
-
|
30
|
-
segments:
|
31
|
-
- 0
|
32
|
-
version: "0"
|
18
|
+
requirements:
|
19
|
+
- - '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: '0'
|
33
22
|
type: :runtime
|
34
|
-
version_requirements: *id001
|
35
|
-
- !ruby/object:Gem::Dependency
|
36
|
-
name: rdoc
|
37
23
|
prerelease: false
|
38
|
-
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: '0'
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: rdoc
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
39
33
|
none: false
|
40
|
-
requirements:
|
41
|
-
- -
|
42
|
-
- !ruby/object:Gem::Version
|
43
|
-
|
44
|
-
segments:
|
45
|
-
- 0
|
46
|
-
version: "0"
|
34
|
+
requirements:
|
35
|
+
- - '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: '0'
|
47
38
|
type: :runtime
|
48
|
-
|
49
|
-
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: '0'
|
46
|
+
- !ruby/object:Gem::Dependency
|
50
47
|
name: nokogiri
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 1.0.7
|
54
|
+
type: :runtime
|
51
55
|
prerelease: false
|
52
|
-
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
53
57
|
none: false
|
54
|
-
requirements:
|
55
|
-
- -
|
56
|
-
- !ruby/object:Gem::Version
|
57
|
-
hash: 25
|
58
|
-
segments:
|
59
|
-
- 1
|
60
|
-
- 0
|
61
|
-
- 7
|
58
|
+
requirements:
|
59
|
+
- - '>='
|
60
|
+
- !ruby/object:Gem::Version
|
62
61
|
version: 1.0.7
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: addressable
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
63
70
|
type: :runtime
|
64
|
-
version_requirements: *id003
|
65
|
-
- !ruby/object:Gem::Dependency
|
66
|
-
name: jeweler
|
67
71
|
prerelease: false
|
68
|
-
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: jeweler
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
69
81
|
none: false
|
70
|
-
requirements:
|
71
|
-
- -
|
72
|
-
- !ruby/object:Gem::Version
|
73
|
-
|
74
|
-
segments:
|
75
|
-
- 0
|
76
|
-
version: "0"
|
82
|
+
requirements:
|
83
|
+
- - '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
77
86
|
type: :development
|
78
|
-
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
79
94
|
description: Term extraction library
|
80
95
|
email: alexrabarts@gmail.com
|
81
96
|
executables: []
|
82
|
-
|
83
97
|
extensions: []
|
84
|
-
|
85
|
-
extra_rdoc_files:
|
98
|
+
extra_rdoc_files:
|
86
99
|
- LICENSE
|
87
|
-
- README
|
88
|
-
files:
|
100
|
+
- README.rdoc
|
101
|
+
files:
|
89
102
|
- Gemfile
|
90
103
|
- Gemfile.lock
|
91
104
|
- LICENSE
|
92
|
-
- README
|
105
|
+
- README.rdoc
|
93
106
|
- Rakefile
|
94
107
|
- VERSION.yml
|
95
108
|
- lib/term_extraction.rb
|
@@ -102,39 +115,32 @@ files:
|
|
102
115
|
- test/fixtures/zemanta2.xml
|
103
116
|
- test/term_extraction_test.rb
|
104
117
|
- test/test_helper.rb
|
105
|
-
has_rdoc: true
|
106
118
|
homepage: http://github.com/alexrabarts/term_extraction
|
107
119
|
licenses: []
|
108
|
-
|
109
120
|
post_install_message:
|
110
121
|
rdoc_options: []
|
111
|
-
|
112
|
-
require_paths:
|
122
|
+
require_paths:
|
113
123
|
- lib
|
114
|
-
required_ruby_version: !ruby/object:Gem::Requirement
|
124
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
115
125
|
none: false
|
116
|
-
requirements:
|
117
|
-
- -
|
118
|
-
- !ruby/object:Gem::Version
|
119
|
-
|
120
|
-
segments:
|
126
|
+
requirements:
|
127
|
+
- - '>='
|
128
|
+
- !ruby/object:Gem::Version
|
129
|
+
version: '0'
|
130
|
+
segments:
|
121
131
|
- 0
|
122
|
-
|
123
|
-
required_rubygems_version: !ruby/object:Gem::Requirement
|
132
|
+
hash: -731793109204494660
|
133
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
124
134
|
none: false
|
125
|
-
requirements:
|
126
|
-
- -
|
127
|
-
- !ruby/object:Gem::Version
|
128
|
-
|
129
|
-
segments:
|
130
|
-
- 0
|
131
|
-
version: "0"
|
135
|
+
requirements:
|
136
|
+
- - '>='
|
137
|
+
- !ruby/object:Gem::Version
|
138
|
+
version: '0'
|
132
139
|
requirements: []
|
133
|
-
|
134
140
|
rubyforge_project:
|
135
|
-
rubygems_version: 1.
|
141
|
+
rubygems_version: 1.8.25
|
136
142
|
signing_key:
|
137
143
|
specification_version: 3
|
138
|
-
summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API
|
144
|
+
summary: Provides access to term extraction APIs such as Yahoo! Term Extraction API
|
145
|
+
and Zemanta.
|
139
146
|
test_files: []
|
140
|
-
|
data/README
DELETED
@@ -1,31 +0,0 @@
|
|
1
|
-
= term_extraction
|
2
|
-
|
3
|
-
== DESCRIPTION:
|
4
|
-
|
5
|
-
Provides access to term extraction APIs such as Yahoo! Term Extraction API and
|
6
|
-
Zemanta.
|
7
|
-
|
8
|
-
== SYNOPSIS:
|
9
|
-
|
10
|
-
# Query Yahoo! for terms
|
11
|
-
yahoo = TermExtraction::Yahoo.new(:api_key => 'myApiKey', :context => 'xbox 360 gears of war')
|
12
|
-
yahoo.terms # => ["gears of war", "xbox 360", "gears", "xbox"]
|
13
|
-
|
14
|
-
# Query Zemanta for terms
|
15
|
-
zemanta = TermExtraction::Zemanta.new(:api_key => 'myApiKey', :context => 'apple imac')
|
16
|
-
zemanta.terms # => ["Apple", "IMac", "Rumor", "Hardware", "Nvidia", "Macintosh", "Desktops", "AllInOne"]
|
17
|
-
|
18
|
-
== INSTALL:
|
19
|
-
|
20
|
-
* Via git:
|
21
|
-
|
22
|
-
git clone git://github.com/alexrabarts/term_extraction.git
|
23
|
-
|
24
|
-
* Via gem:
|
25
|
-
|
26
|
-
gem install alexrabarts-term_extraction -s http://gems.github.com
|
27
|
-
|
28
|
-
COPYRIGHT
|
29
|
-
=========
|
30
|
-
|
31
|
-
Copyright (c) 2009 Stateless Systems (http://statelesssystems.com). See LICENSE for details.
|