sem_extractor 0.0.3 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -1
- data/VERSION +1 -1
- data/lib/apis/calais.rb +19 -19
- data/lib/apis/textwise.rb +1 -1
- data/lib/apis/zemanta.rb +2 -2
- data/lib/sem_extractor.rb +1 -1
- data/sem_extractor.gemspec +3 -3
- metadata +5 -5
data/Rakefile
CHANGED
@@ -8,7 +8,7 @@ begin
|
|
8
8
|
gem.summary = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
9
9
|
gem.description = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
10
10
|
gem.email = "apnea.diving.deep@gmail.com"
|
11
|
-
gem.homepage = "http://github.com/apneadiving/
|
11
|
+
gem.homepage = "http://github.com/apneadiving/SemExtractor"
|
12
12
|
gem.authors = ["apneadiving"]
|
13
13
|
gem.add_dependency "nokogiri", ">= 0"
|
14
14
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.0.
|
1
|
+
0.0.4
|
data/lib/apis/calais.rb
CHANGED
@@ -1,24 +1,24 @@
|
|
1
1
|
class SemExtractor
|
2
2
|
class Calais < SemExtractor
|
3
3
|
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
4
|
+
def initialize(options={})
|
5
|
+
self.set(options)
|
6
|
+
Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|
|
7
|
+
node_type = h.xpath('rdf:type').first['resource']
|
8
|
+
if node_type.include?('/type/cat/')
|
9
|
+
@categories << { "name" => sanitize(h.xpath('c:categoryName')), "score"=> sanitize(h.xpath('c:score'))}
|
10
|
+
elsif node_type.include?('/type/em/')
|
11
|
+
@terms << { "name" => sanitize(h.xpath('c:name')), "score" => nil, "nationality" => sanitize(h.xpath('c:nationality')) }
|
12
|
+
elsif node_type.include?('/type/sys/InstanceInfo/')
|
13
|
+
#nothing to do, no info to take
|
14
|
+
elsif node_type.include?('/type/sys/RelevanceInfo/')
|
15
|
+
# I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
|
16
|
+
@terms.last["score"] = sanitize(h.xpath('c:relevance'))
|
17
|
+
elsif node_type.include?('/type/er/Geo/')
|
18
|
+
@geos << { "name" => sanitize(h.xpath('c:name')) }
|
19
|
+
end
|
20
|
+
}
|
21
|
+
end
|
22
22
|
|
23
23
|
def uri
|
24
24
|
URI.parse(gateway + '?' + URI.escape(post_params.collect{ |k, v| "#{k}=#{v}" }.join('&')))
|
@@ -42,7 +42,7 @@ class SemExtractor
|
|
42
42
|
|
43
43
|
def remote_xml
|
44
44
|
begin
|
45
|
-
Net::HTTP.get_response((uri)).body
|
45
|
+
puts Net::HTTP.get_response((uri)).body
|
46
46
|
rescue => e
|
47
47
|
$stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
|
48
48
|
nil
|
data/lib/apis/textwise.rb
CHANGED
data/lib/apis/zemanta.rb
CHANGED
@@ -4,8 +4,8 @@ class SemExtractor
|
|
4
4
|
def initialize(options={})
|
5
5
|
self.set(options)
|
6
6
|
xml = remote_xml
|
7
|
-
@categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.
|
8
|
-
@terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.
|
7
|
+
@categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
|
8
|
+
@terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
|
9
9
|
end
|
10
10
|
|
11
11
|
def uri
|
data/lib/sem_extractor.rb
CHANGED
data/sem_extractor.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{sem_extractor}
|
8
|
-
s.version = "0.0.
|
8
|
+
s.version = "0.0.4"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["apneadiving"]
|
12
|
-
s.date = %q{2010-10-
|
12
|
+
s.date = %q{2010-10-10}
|
13
13
|
s.description = %q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
|
14
14
|
s.email = %q{apnea.diving.deep@gmail.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -34,7 +34,7 @@ Gem::Specification.new do |s|
|
|
34
34
|
"test/helper.rb",
|
35
35
|
"test/test_sem_extractor.rb"
|
36
36
|
]
|
37
|
-
s.homepage = %q{http://github.com/apneadiving/
|
37
|
+
s.homepage = %q{http://github.com/apneadiving/SemExtractor}
|
38
38
|
s.rdoc_options = ["--charset=UTF-8"]
|
39
39
|
s.require_paths = ["lib"]
|
40
40
|
s.rubygems_version = %q{1.3.7}
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sem_extractor
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 0.0.
|
9
|
+
- 4
|
10
|
+
version: 0.0.4
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- apneadiving
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-10-
|
18
|
+
date: 2010-10-10 00:00:00 +02:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -59,7 +59,7 @@ files:
|
|
59
59
|
- test/helper.rb
|
60
60
|
- test/test_sem_extractor.rb
|
61
61
|
has_rdoc: true
|
62
|
-
homepage: http://github.com/apneadiving/
|
62
|
+
homepage: http://github.com/apneadiving/SemExtractor
|
63
63
|
licenses: []
|
64
64
|
|
65
65
|
post_install_message:
|