sem_extractor 0.0.3 → 0.0.4

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -8,7 +8,7 @@ begin
8
8
  gem.summary = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
9
9
  gem.description = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
10
10
  gem.email = "apnea.diving.deep@gmail.com"
11
- gem.homepage = "http://github.com/apneadiving/sem_extractor"
11
+ gem.homepage = "http://github.com/apneadiving/SemExtractor"
12
12
  gem.authors = ["apneadiving"]
13
13
  gem.add_dependency "nokogiri", ">= 0"
14
14
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.3
1
+ 0.0.4
data/lib/apis/calais.rb CHANGED
@@ -1,24 +1,24 @@
1
1
  class SemExtractor
2
2
  class Calais < SemExtractor
3
3
 
4
- def initialize(options={})
5
- self.set(options)
6
- Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|
7
- node_type = h.xpath('rdf:type').first['resource']
8
- if node_type.include?('/type/cat/')
9
- @categories << { "name" => sanitize(h.xpath('c:categoryName')), "score"=> sanitize(h.xpath('c:score'))}
10
- elsif node_type.include?('/type/em/')
11
- @terms << { "name" => sanitize(h.xpath('c:name')), "score" => nil, "nationality" => sanitize(h.xpath('c:nationality')) }
12
- elsif node_type.include?('/type/sys/InstanceInfo')
13
- #nothing to do, no info to take
14
- elsif node_type.include?('/type/sys/RelevanceInfo')
15
- # I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
16
- @terms.last["score"] = sanitize(h.xpath('c:relevance'))
17
- elsif node_type.include?('/Geo/')
18
- @geos <<{ "name" => sanitize(h.xpath('c:name')) }
19
- end
20
- }
21
- end
4
+ def initialize(options={})
5
+ self.set(options)
6
+ Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|
7
+ node_type = h.xpath('rdf:type').first['resource']
8
+ if node_type.include?('/type/cat/')
9
+ @categories << { "name" => sanitize(h.xpath('c:categoryName')), "score"=> sanitize(h.xpath('c:score'))}
10
+ elsif node_type.include?('/type/em/')
11
+ @terms << { "name" => sanitize(h.xpath('c:name')), "score" => nil, "nationality" => sanitize(h.xpath('c:nationality')) }
12
+ elsif node_type.include?('/type/sys/InstanceInfo/')
13
+ #nothing to do, no info to take
14
+ elsif node_type.include?('/type/sys/RelevanceInfo/')
15
+ # I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
16
+ @terms.last["score"] = sanitize(h.xpath('c:relevance'))
17
+ elsif node_type.include?('/type/er/Geo/')
18
+ @geos << { "name" => sanitize(h.xpath('c:name')) }
19
+ end
20
+ }
21
+ end
22
22
 
23
23
  def uri
24
24
  URI.parse(gateway + '?' + URI.escape(post_params.collect{ |k, v| "#{k}=#{v}" }.join('&')))
@@ -42,7 +42,7 @@ class SemExtractor
42
42
 
43
43
  def remote_xml
44
44
  begin
45
- Net::HTTP.get_response((uri)).body
45
+ puts Net::HTTP.get_response((uri)).body
46
46
  rescue => e
47
47
  $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
48
48
  nil
data/lib/apis/textwise.rb CHANGED
@@ -1,6 +1,6 @@
1
1
  class SemExtractor
2
2
  class Textwise < SemExtractor
3
- #I kept here methods instead of variables because,each one leads to a http call
3
+ #I kept methods here instead of variables because,each one leads to a http call
4
4
 
5
5
  def initialize(options={})
6
6
  self.set(options)
data/lib/apis/zemanta.rb CHANGED
@@ -4,8 +4,8 @@ class SemExtractor
4
4
  def initialize(options={})
5
5
  self.set(options)
6
6
  xml = remote_xml
7
- @categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
8
- @terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
7
+ @categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
8
+ @terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.at_css('confidence').content, "name" => h.at_css('name').content} }
9
9
  end
10
10
 
11
11
  def uri
data/lib/sem_extractor.rb CHANGED
@@ -6,7 +6,7 @@ require 'open-uri'
6
6
  class SemExtractor
7
7
  attr_accessor :context, :api_key, :categories, :terms, :geos
8
8
 
9
- def set(options={})
9
+ def set(options)
10
10
  @context = options[:context]
11
11
  @api_key = options[:api_key]
12
12
  @type = options[:type]
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{sem_extractor}
8
- s.version = "0.0.3"
8
+ s.version = "0.0.4"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["apneadiving"]
12
- s.date = %q{2010-10-02}
12
+ s.date = %q{2010-10-10}
13
13
  s.description = %q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
14
14
  s.email = %q{apnea.diving.deep@gmail.com}
15
15
  s.extra_rdoc_files = [
@@ -34,7 +34,7 @@ Gem::Specification.new do |s|
34
34
  "test/helper.rb",
35
35
  "test/test_sem_extractor.rb"
36
36
  ]
37
- s.homepage = %q{http://github.com/apneadiving/sem_extractor}
37
+ s.homepage = %q{http://github.com/apneadiving/SemExtractor}
38
38
  s.rdoc_options = ["--charset=UTF-8"]
39
39
  s.require_paths = ["lib"]
40
40
  s.rubygems_version = %q{1.3.7}
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sem_extractor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 25
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 3
10
- version: 0.0.3
9
+ - 4
10
+ version: 0.0.4
11
11
  platform: ruby
12
12
  authors:
13
13
  - apneadiving
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-10-02 00:00:00 +02:00
18
+ date: 2010-10-10 00:00:00 +02:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -59,7 +59,7 @@ files:
59
59
  - test/helper.rb
60
60
  - test/test_sem_extractor.rb
61
61
  has_rdoc: true
62
- homepage: http://github.com/apneadiving/sem_extractor
62
+ homepage: http://github.com/apneadiving/SemExtractor
63
63
  licenses: []
64
64
 
65
65
  post_install_message: