sem_extractor 0.0.3 → 0.0.4
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/apis/calais.rb +19 -19
- data/lib/apis/textwise.rb +1 -1
- data/lib/apis/zemanta.rb +2 -2
- data/lib/sem_extractor.rb +1 -1
- data/sem_extractor.gemspec +3 -3
- metadata +5 -5
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ begin
|
|
8
8
|
gem.summary = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
9
9
|
gem.description = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
10
10
|
gem.email = "apnea.diving.deep@gmail.com"
|
11
|
-
gem.homepage = "http://github.com/apneadiving/
|
11
|
+
gem.homepage = "http://github.com/apneadiving/SemExtractor"
|
12
12
|
gem.authors = ["apneadiving"]
|
13
13
|
gem.add_dependency "nokogiri", ">= 0"
|
14
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/lib/apis/calais.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
class SemExtractor
|
2
2
|
class Calais < SemExtractor
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
4
|
+
def initialize(options={})
|
5
|
+
self.set(options)
|
6
|
+
Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|
|
7
|
+
node_type = h.xpath('rdf:type').first['resource']
|
8
|
+
if node_type.include?('/type/cat/')
|
9
|
+
@categories << { "name" => sanitize(h.xpath('c:categoryName')), "score"=> sanitize(h.xpath('c:score'))}
|
10
|
+
elsif node_type.include?('/type/em/')
|
11
|
+
@terms << { "name" => sanitize(h.xpath('c:name')), "score" => nil, "nationality" => sanitize(h.xpath('c:nationality')) }
|
12
|
+
elsif node_type.include?('/type/sys/InstanceInfo/')
|
13
|
+
#nothing to do, no info to take
|
14
|
+
elsif node_type.include?('/type/sys/RelevanceInfo/')
|
15
|
+
# I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
|
16
|
+
@terms.last["score"] = sanitize(h.xpath('c:relevance'))
|
17
|
+
elsif node_type.include?('/type/er/Geo/')
|
18
|
+
@geos << { "name" => sanitize(h.xpath('c:name')) }
|
19
|
+
end
|
20
|
+
}
|
21
|
+
end
|
22
22
|
|
23
23
|
def uri
|
24
24
|
URI.parse(gateway + '?' + URI.escape(post_params.collect{ |k, v| "#{k}=#{v}" }.join('&')))
|
@@ -42,7 +42,7 @@ class SemExtractor
|
|
42
42
|
|
43
43
|
def remote_xml
|
44
44
|
begin
|
45
|
-
Net::HTTP.get_response((uri)).body
|
45
|
+
puts Net::HTTP.get_response((uri)).body
|
46
46
|
rescue => e
|
47
47
|
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
48
48
|
nil
|
data/lib/apis/textwise.rb
CHANGED
data/lib/apis/zemanta.rb
CHANGED
@@ -4,8 +4,8 @@ class SemExtractor
|
|
4
4
|
def initialize(options={})
|
5
5
|
self.set(options)
|
6
6
|
xml = remote_xml
|
7
|
-
@categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.
|
8
|
-
@terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.
|
7
|
+
@categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
|
8
|
+
@terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
|
9
9
|
end
|
10
10
|
|
11
11
|
def uri
|
data/lib/sem_extractor.rb
CHANGED
data/sem_extractor.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{sem_extractor}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["apneadiving"]
|
12
|
-
s.date = %q{2010-10-
|
12
|
+
s.date = %q{2010-10-10}
|
13
13
|
s.description = %q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
14
14
|
s.email = %q{apnea.diving.deep@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -34,7 +34,7 @@ Gem::Specification.new do |s|
|
|
34
34
|
"test/helper.rb",
|
35
35
|
"test/test_sem_extractor.rb"
|
36
36
|
]
|
37
|
-
s.homepage = %q{http://github.com/apneadiving/
|
37
|
+
s.homepage = %q{http://github.com/apneadiving/SemExtractor}
|
38
38
|
s.rdoc_options = ["--charset=UTF-8"]
|
39
39
|
s.require_paths = ["lib"]
|
40
40
|
s.rubygems_version = %q{1.3.7}
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sem_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- apneadiving
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-10-
|
18
|
+
date: 2010-10-10 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- test/helper.rb
|
60
60
|
- test/test_sem_extractor.rb
|
61
61
|
has_rdoc: true
|
62
|
-
homepage: http://github.com/apneadiving/
|
62
|
+
homepage: http://github.com/apneadiving/SemExtractor
|
63
63
|
licenses: []
|
64
64
|
|
65
65
|
post_install_message:
|