sem_extractor 0.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE +20 -0
- data/README.rdoc +37 -0
- data/Rakefile +53 -0
- data/VERSION +1 -0
- data/lib/.DS_Store +0 -0
- data/lib/apis/textwise.rb +59 -0
- data/lib/apis/yahoo.rb +39 -0
- data/lib/apis/zemanta.rb +52 -0
- data/lib/sem_extractor.rb +13 -0
- data/pkg/sem_extractor-0.0.0.gem +0 -0
- data/sem_extractor.gemspec +57 -0
- data/test/helper.rb +10 -0
- data/test/test_sem_extractor.rb +7 -0
- metadata +94 -0
data/LICENSE
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
Copyright (c) 2009 apneadiving
|
2
|
+
|
3
|
+
Permission is hereby granted, free of charge, to any person obtaining
|
4
|
+
a copy of this software and associated documentation files (the
|
5
|
+
"Software"), to deal in the Software without restriction, including
|
6
|
+
without limitation the rights to use, copy, modify, merge, publish,
|
7
|
+
distribute, sublicense, and/or sell copies of the Software, and to
|
8
|
+
permit persons to whom the Software is furnished to do so, subject to
|
9
|
+
the following conditions:
|
10
|
+
|
11
|
+
The above copyright notice and this permission notice shall be
|
12
|
+
included in all copies or substantial portions of the Software.
|
13
|
+
|
14
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
|
15
|
+
EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
|
16
|
+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
|
17
|
+
NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
|
18
|
+
LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
|
19
|
+
OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
|
20
|
+
WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,37 @@
|
|
1
|
+
= sem_extractor
|
2
|
+
SemExtractor is made to have in a single place, wrappers for most of the semantic librairies:
|
3
|
+
- Zemanta
|
4
|
+
- Semantic Hacker from Textwise
|
5
|
+
- Yahoo Boss
|
6
|
+
|
7
|
+
Please tell me if there are more API's to include!
|
8
|
+
|
9
|
+
After using Term Extraction gem, I happened to need the score of the different tags I got from the different APIS + I wanted to use Nokogiri for performance concerns.
|
10
|
+
Most of the methods below retrieve a hash with 'name' and 'score'
|
11
|
+
|
12
|
+
Initialize:
|
13
|
+
- yahoo = SemExtractor::Yahoo.new(:api_key => your_key, :context => your_text)
|
14
|
+
- zemanta = SemExtractor::Zemanta.new(:api_key => your_key, :context => your_text)
|
15
|
+
- sem = SemExtractor::Textwise.new(:api_key => your_key, :context => your_text_or_url)
|
16
|
+
|
17
|
+
Get info:
|
18
|
+
- yahoo.terms
|
19
|
+
- zemanta.terms
|
20
|
+
- zemanta.categories
|
21
|
+
- sem.terms
|
22
|
+
- sem.categories
|
23
|
+
- sem.filter #filters the useful content of a web page, retrieves text
|
24
|
+
|
25
|
+
== Note on Patches/Pull Requests
|
26
|
+
|
27
|
+
* Fork the project.
|
28
|
+
* Make your feature addition or bug fix.
|
29
|
+
* Add tests for it. This is important so I don't break it in a
|
30
|
+
future version unintentionally.
|
31
|
+
* Commit, do not mess with rakefile, version, or history.
|
32
|
+
(if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
|
33
|
+
* Send me a pull request. Bonus points for topic branches.
|
34
|
+
|
35
|
+
== Copyright
|
36
|
+
|
37
|
+
Copyright (c) 2010 apneadiving. See LICENSE for details.
|
data/Rakefile
ADDED
@@ -0,0 +1,53 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'rake'
|
3
|
+
|
4
|
+
begin
|
5
|
+
require 'jeweler'
|
6
|
+
Jeweler::Tasks.new do |gem|
|
7
|
+
gem.name = "sem_extractor"
|
8
|
+
gem.summary = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
|
9
|
+
gem.description = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
|
10
|
+
gem.email = "apnea.diving.deep@gmail.com"
|
11
|
+
gem.homepage = "http://github.com/apneadiving/sem_extractor"
|
12
|
+
gem.authors = ["apneadiving"]
|
13
|
+
gem.add_dependency "nokogiri", ">= 0"
|
14
|
+
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
15
|
+
end
|
16
|
+
Jeweler::GemcutterTasks.new
|
17
|
+
rescue LoadError
|
18
|
+
puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
|
19
|
+
end
|
20
|
+
|
21
|
+
require 'rake/testtask'
|
22
|
+
Rake::TestTask.new(:test) do |test|
|
23
|
+
test.libs << 'lib' << 'test'
|
24
|
+
test.pattern = 'test/**/test_*.rb'
|
25
|
+
test.verbose = true
|
26
|
+
end
|
27
|
+
|
28
|
+
begin
|
29
|
+
require 'rcov/rcovtask'
|
30
|
+
Rcov::RcovTask.new do |test|
|
31
|
+
test.libs << 'test'
|
32
|
+
test.pattern = 'test/**/test_*.rb'
|
33
|
+
test.verbose = true
|
34
|
+
end
|
35
|
+
rescue LoadError
|
36
|
+
task :rcov do
|
37
|
+
abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
task :test => :check_dependencies
|
42
|
+
|
43
|
+
task :default => :test
|
44
|
+
|
45
|
+
require 'rake/rdoctask'
|
46
|
+
Rake::RDocTask.new do |rdoc|
|
47
|
+
version = File.exist?('VERSION') ? File.read('VERSION') : ""
|
48
|
+
|
49
|
+
rdoc.rdoc_dir = 'rdoc'
|
50
|
+
rdoc.title = "sem_extractor #{version}"
|
51
|
+
rdoc.rdoc_files.include('README*')
|
52
|
+
rdoc.rdoc_files.include('lib/**/*.rb')
|
53
|
+
end
|
data/VERSION
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
0.0.1
|
data/lib/.DS_Store
ADDED
Binary file
|
@@ -0,0 +1,59 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class SemExtractor
|
6
|
+
class Textwise < SemExtractor
|
7
|
+
def terms
|
8
|
+
@options = { 'content' => @context }
|
9
|
+
get_entity
|
10
|
+
end
|
11
|
+
|
12
|
+
def categories
|
13
|
+
@options = {'content' => @context, 'showLabels' => "true" }
|
14
|
+
@type = 'category'
|
15
|
+
get_entity
|
16
|
+
end
|
17
|
+
|
18
|
+
def filter
|
19
|
+
@options = {'uri' => @context }
|
20
|
+
@type = 'filter/web'
|
21
|
+
remote_xml
|
22
|
+
end
|
23
|
+
|
24
|
+
def match
|
25
|
+
@type = 'match/rsscombined'
|
26
|
+
@options = {'content' => @context }
|
27
|
+
puts remote_xml
|
28
|
+
end
|
29
|
+
|
30
|
+
def get_entity
|
31
|
+
begin
|
32
|
+
Nokogiri::XML(remote_xml).css(@type).map { |h| {"score" => h['weight'], "name" => h['label']} }
|
33
|
+
rescue
|
34
|
+
[]
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
def uri
|
39
|
+
api_uri = URI.parse(gateway)
|
40
|
+
api_uri.query = @options.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
|
41
|
+
api_uri
|
42
|
+
end
|
43
|
+
|
44
|
+
private
|
45
|
+
def gateway
|
46
|
+
@type ||= 'concept'
|
47
|
+
'http://api.semantichacker.com/' + @api_key + '/' + @type + '?'
|
48
|
+
end
|
49
|
+
|
50
|
+
def remote_xml
|
51
|
+
begin
|
52
|
+
open(uri).read
|
53
|
+
rescue => e
|
54
|
+
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
data/lib/apis/yahoo.rb
ADDED
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'open-uri'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class SemExtractor
|
6
|
+
class Yahoo < SemExtractor
|
7
|
+
def terms
|
8
|
+
begin
|
9
|
+
Nokogiri::XML(remote_xml).css('Result').map { |h| {"name" => h.content} }
|
10
|
+
rescue
|
11
|
+
[]
|
12
|
+
end
|
13
|
+
end
|
14
|
+
|
15
|
+
def uri
|
16
|
+
api_uri = URI.parse(gateway)
|
17
|
+
api_uri.query = {
|
18
|
+
'appid' => @api_key,
|
19
|
+
'output' => 'xml',
|
20
|
+
'context' => @context
|
21
|
+
}.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
|
22
|
+
api_uri
|
23
|
+
end
|
24
|
+
|
25
|
+
private
|
26
|
+
def gateway
|
27
|
+
'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
|
28
|
+
end
|
29
|
+
|
30
|
+
def remote_xml
|
31
|
+
begin
|
32
|
+
open(uri).read
|
33
|
+
rescue => e
|
34
|
+
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
35
|
+
nil
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
39
|
+
end
|
data/lib/apis/zemanta.rb
ADDED
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'net/http'
|
3
|
+
require 'uri'
|
4
|
+
|
5
|
+
class SemExtractor
|
6
|
+
class Zemanta < SemExtractor
|
7
|
+
|
8
|
+
def terms
|
9
|
+
begin
|
10
|
+
@categories = Nokogiri::XML(remote_xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
|
11
|
+
Nokogiri::XML(remote_xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
|
12
|
+
rescue
|
13
|
+
[]
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
def categories
|
18
|
+
terms if @categories == nil
|
19
|
+
return @categories
|
20
|
+
end
|
21
|
+
|
22
|
+
def uri
|
23
|
+
URI.parse(gateway)
|
24
|
+
end
|
25
|
+
|
26
|
+
def post_params
|
27
|
+
{
|
28
|
+
'method' =>'zemanta.suggest',
|
29
|
+
'api_key' => @api_key,
|
30
|
+
'return_images' => 0,
|
31
|
+
'text' => @context,
|
32
|
+
'format' => 'xml',
|
33
|
+
'articles_limit' => 1,
|
34
|
+
'return_categories' => 'dmoz'
|
35
|
+
}
|
36
|
+
end
|
37
|
+
|
38
|
+
private
|
39
|
+
def gateway
|
40
|
+
'http://api.zemanta.com/services/rest/0.0/'
|
41
|
+
end
|
42
|
+
|
43
|
+
def remote_xml
|
44
|
+
begin
|
45
|
+
Net::HTTP.post_form(uri, post_params).body
|
46
|
+
rescue => e
|
47
|
+
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
48
|
+
nil
|
49
|
+
end
|
50
|
+
end
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,13 @@
|
|
1
|
+
class SemExtractor
|
2
|
+
attr_accessor :context, :api_key, :categories
|
3
|
+
|
4
|
+
def initialize(options={})
|
5
|
+
@context = options[:context]
|
6
|
+
@api_key = options[:api_key]
|
7
|
+
@type = options[:type]
|
8
|
+
@categories = nil
|
9
|
+
end
|
10
|
+
|
11
|
+
end
|
12
|
+
|
13
|
+
%w{yahoo zemanta textwise}.each{|t| require "apis/#{t}"}
|
Binary file
|
@@ -0,0 +1,57 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{sem_extractor}
|
8
|
+
s.version = "0.0.1"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["apneadiving"]
|
12
|
+
s.date = %q{2010-10-02}
|
13
|
+
s.description = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
|
14
|
+
s.email = %q{apnea.diving.deep@gmail.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
"LICENSE",
|
21
|
+
"README.rdoc",
|
22
|
+
"Rakefile",
|
23
|
+
"VERSION",
|
24
|
+
"lib/.DS_Store",
|
25
|
+
"lib/apis/textwise.rb",
|
26
|
+
"lib/apis/yahoo.rb",
|
27
|
+
"lib/apis/zemanta.rb",
|
28
|
+
"lib/sem_extractor.rb",
|
29
|
+
"pkg/sem_extractor-0.0.0.gem",
|
30
|
+
"sem_extractor.gemspec",
|
31
|
+
"test/helper.rb",
|
32
|
+
"test/test_sem_extractor.rb"
|
33
|
+
]
|
34
|
+
s.homepage = %q{http://github.com/apneadiving/sem_extractor}
|
35
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
36
|
+
s.require_paths = ["lib"]
|
37
|
+
s.rubygems_version = %q{1.3.7}
|
38
|
+
s.summary = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
|
39
|
+
s.test_files = [
|
40
|
+
"test/helper.rb",
|
41
|
+
"test/test_sem_extractor.rb"
|
42
|
+
]
|
43
|
+
|
44
|
+
if s.respond_to? :specification_version then
|
45
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
46
|
+
s.specification_version = 3
|
47
|
+
|
48
|
+
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
49
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
50
|
+
else
|
51
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
52
|
+
end
|
53
|
+
else
|
54
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
data/test/helper.rb
ADDED
metadata
ADDED
@@ -0,0 +1,94 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: sem_extractor
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
hash: 29
|
5
|
+
prerelease: false
|
6
|
+
segments:
|
7
|
+
- 0
|
8
|
+
- 0
|
9
|
+
- 1
|
10
|
+
version: 0.0.1
|
11
|
+
platform: ruby
|
12
|
+
authors:
|
13
|
+
- apneadiving
|
14
|
+
autorequire:
|
15
|
+
bindir: bin
|
16
|
+
cert_chain: []
|
17
|
+
|
18
|
+
date: 2010-10-02 00:00:00 +02:00
|
19
|
+
default_executable:
|
20
|
+
dependencies:
|
21
|
+
- !ruby/object:Gem::Dependency
|
22
|
+
name: nokogiri
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ">="
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
32
|
+
version: "0"
|
33
|
+
type: :runtime
|
34
|
+
version_requirements: *id001
|
35
|
+
description: Extracts data from semantics API like zemanta, textwise and yahoo
|
36
|
+
email: apnea.diving.deep@gmail.com
|
37
|
+
executables: []
|
38
|
+
|
39
|
+
extensions: []
|
40
|
+
|
41
|
+
extra_rdoc_files:
|
42
|
+
- LICENSE
|
43
|
+
- README.rdoc
|
44
|
+
files:
|
45
|
+
- LICENSE
|
46
|
+
- README.rdoc
|
47
|
+
- Rakefile
|
48
|
+
- VERSION
|
49
|
+
- lib/.DS_Store
|
50
|
+
- lib/apis/textwise.rb
|
51
|
+
- lib/apis/yahoo.rb
|
52
|
+
- lib/apis/zemanta.rb
|
53
|
+
- lib/sem_extractor.rb
|
54
|
+
- pkg/sem_extractor-0.0.0.gem
|
55
|
+
- sem_extractor.gemspec
|
56
|
+
- test/helper.rb
|
57
|
+
- test/test_sem_extractor.rb
|
58
|
+
has_rdoc: true
|
59
|
+
homepage: http://github.com/apneadiving/sem_extractor
|
60
|
+
licenses: []
|
61
|
+
|
62
|
+
post_install_message:
|
63
|
+
rdoc_options:
|
64
|
+
- --charset=UTF-8
|
65
|
+
require_paths:
|
66
|
+
- lib
|
67
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
68
|
+
none: false
|
69
|
+
requirements:
|
70
|
+
- - ">="
|
71
|
+
- !ruby/object:Gem::Version
|
72
|
+
hash: 3
|
73
|
+
segments:
|
74
|
+
- 0
|
75
|
+
version: "0"
|
76
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
77
|
+
none: false
|
78
|
+
requirements:
|
79
|
+
- - ">="
|
80
|
+
- !ruby/object:Gem::Version
|
81
|
+
hash: 3
|
82
|
+
segments:
|
83
|
+
- 0
|
84
|
+
version: "0"
|
85
|
+
requirements: []
|
86
|
+
|
87
|
+
rubyforge_project:
|
88
|
+
rubygems_version: 1.3.7
|
89
|
+
signing_key:
|
90
|
+
specification_version: 3
|
91
|
+
summary: Extracts data from semantics API like zemanta, textwise and yahoo
|
92
|
+
test_files:
|
93
|
+
- test/helper.rb
|
94
|
+
- test/test_sem_extractor.rb
|