sem_extractor 0.0.1 → 0.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.rdoc CHANGED
@@ -1,18 +1,30 @@
1
- = sem_extractor
1
+ == SemExtractor
2
+
2
3
  SemExtractor is made to have in a single place, wrappers for most of the semantic librairies:
3
4
  - Zemanta
4
5
  - Semantic Hacker from Textwise
5
6
  - Yahoo Boss
7
+ - OpenCalais
8
+
9
+ = H2 Please tell me if there are more API's to include!
10
+
11
+ <em>After using Term Extraction gem, I happened to need the score of the different tags I got from the different APIS + I wanted to use Nokogiri for performance concerns.
12
+ I thank alexrabarts, because his work gave me the idea to create my first gem</em>
6
13
 
7
- Please tell me if there are more API's to include!
14
+ == Installation
8
15
 
9
- After using Term Extraction gem, I happened to need the score of the different tags I got from the different APIS + I wanted to use Nokogiri for performance concerns.
16
+ To install (I strongly recommend you use RVM):
17
+ gem install sem_extractor
18
+
19
+
20
+ == Usage
10
21
  Most of the methods below retrieve a hash with 'name' and 'score'
11
22
 
12
23
  Initialize:
13
24
  - yahoo = SemExtractor::Yahoo.new(:api_key => your_key, :context => your_text)
14
25
  - zemanta = SemExtractor::Zemanta.new(:api_key => your_key, :context => your_text)
15
26
  - sem = SemExtractor::Textwise.new(:api_key => your_key, :context => your_text_or_url)
27
+ - calais = SemExtractor::Calais.new(:api_key => CALAIS, :context => REQUEST)
16
28
 
17
29
  Get info:
18
30
  - yahoo.terms
@@ -21,6 +33,9 @@ Get info:
21
33
  - sem.terms
22
34
  - sem.categories
23
35
  - sem.filter #filters the useful content of a web page, retrieves text
36
+ - calais.terms
37
+ - calais.categories
38
+ - calais.geos
24
39
 
25
40
  == Note on Patches/Pull Requests
26
41
 
@@ -32,6 +47,9 @@ Get info:
32
47
  (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
33
48
  * Send me a pull request. Bonus points for topic branches.
34
49
 
50
+ == Todo
51
+ I've to create the tests for this gem... :)
52
+
35
53
  == Copyright
36
54
 
37
55
  Copyright (c) 2010 apneadiving. See LICENSE for details.
data/Rakefile CHANGED
@@ -5,8 +5,8 @@ begin
5
5
  require 'jeweler'
6
6
  Jeweler::Tasks.new do |gem|
7
7
  gem.name = "sem_extractor"
8
- gem.summary = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
9
- gem.description = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
8
+ gem.summary = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
9
+ gem.description = %Q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
10
10
  gem.email = "apnea.diving.deep@gmail.com"
11
11
  gem.homepage = "http://github.com/apneadiving/sem_extractor"
12
12
  gem.authors = ["apneadiving"]
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.0.1
1
+ 0.0.2
@@ -0,0 +1,49 @@
1
+ class SemExtractor
2
+ class Calais < SemExtractor
3
+
4
+ def initialize(options={})
5
+ self.set(options)
6
+ Nokogiri::XML(remote_xml).xpath('//rdf:Description').map { |h|
7
+ node_type = h.xpath('rdf:type').first['resource']
8
+ if node_type.include?('/type/cat/')
9
+ @categories << { "name" => h.xpath('c:categoryName').first.content, "score"=>h.xpath('c:score').first.content}
10
+ elsif node_type.include?('/type/em/')
11
+ nationality = h.xpath('c:nationality').first.nil? ? 'N/A' : h.xpath('c:nationality').first.content
12
+ @terms << { "name" => h.xpath('c:name').first.content, "score" => nil, "nationality" => nationality }
13
+ elsif node_type.include?('/type/sys/InstanceInfo')
14
+ #nothing to do, no info to take
15
+ elsif node_type.include?('/type/sys/RelevanceInfo')
16
+ # I assume here, Open Calais will keep on giving information in the proper order, seems fair :)
17
+ @terms.last["score"] = h.xpath('c:relevance').first.content
18
+ elsif node_type.include?('/Geo/')
19
+ @geos <<{ "name" => h.xpath('c:name').first.content }
20
+ end
21
+ }
22
+ end
23
+
24
+ def uri
25
+ URI.parse(gateway + '?' + URI.escape(post_params.collect{ |k, v| "#{k}=#{v}" }.join('&')))
26
+ end
27
+
28
+ def post_params
29
+ {
30
+ 'licenseID' => @api_key,
31
+ 'content' => @context
32
+ }
33
+ end
34
+
35
+ private
36
+ def gateway
37
+ 'http://api.opencalais.com/enlighten/rest/'
38
+ end
39
+
40
+ def remote_xml
41
+ begin
42
+ Net::HTTP.get_response((uri)).body
43
+ rescue => e
44
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
45
+ nil
46
+ end
47
+ end
48
+ end
49
+ end
data/lib/apis/textwise.rb CHANGED
@@ -1,9 +1,11 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'uri'
4
-
5
1
  class SemExtractor
6
2
  class Textwise < SemExtractor
3
+ #I kept here methods instead of variables because,each one leads to a http call
4
+
5
+ def initialize(options={})
6
+ self.set(options)
7
+ end
8
+
7
9
  def terms
8
10
  @options = { 'content' => @context }
9
11
  get_entity
@@ -24,7 +26,6 @@ class SemExtractor
24
26
  def match
25
27
  @type = 'match/rsscombined'
26
28
  @options = {'content' => @context }
27
- puts remote_xml
28
29
  end
29
30
 
30
31
  def get_entity
data/lib/apis/yahoo.rb CHANGED
@@ -1,15 +1,8 @@
1
- require 'nokogiri'
2
- require 'open-uri'
3
- require 'uri'
4
-
5
1
  class SemExtractor
6
2
  class Yahoo < SemExtractor
7
- def terms
8
- begin
9
- Nokogiri::XML(remote_xml).css('Result').map { |h| {"name" => h.content} }
10
- rescue
11
- []
12
- end
3
+ def initialize(options={})
4
+ self.set(options)
5
+ @terms = Nokogiri::XML(remote_xml).css('Result').map { |h| {"name" => h.content} }
13
6
  end
14
7
 
15
8
  def uri
data/lib/apis/zemanta.rb CHANGED
@@ -1,24 +1,13 @@
1
- require 'nokogiri'
2
- require 'net/http'
3
- require 'uri'
4
-
5
1
  class SemExtractor
6
2
  class Zemanta < SemExtractor
7
3
 
8
- def terms
9
- begin
10
- @categories = Nokogiri::XML(remote_xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
11
- Nokogiri::XML(remote_xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
12
- rescue
13
- []
14
- end
4
+ def initialize(options={})
5
+ self.set(options)
6
+ xml = remote_xml
7
+ @categories = Nokogiri::XML(xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
8
+ @terms = Nokogiri::XML(xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
15
9
  end
16
10
 
17
- def categories
18
- terms if @categories == nil
19
- return @categories
20
- end
21
-
22
11
  def uri
23
12
  URI.parse(gateway)
24
13
  end
data/lib/sem_extractor.rb CHANGED
@@ -1,13 +1,18 @@
1
+ require 'nokogiri'
2
+ require 'uri'
3
+ require 'net/http'
4
+ require 'open-uri'
5
+
1
6
  class SemExtractor
2
- attr_accessor :context, :api_key, :categories
7
+ attr_accessor :context, :api_key, :categories, :terms, :geos
3
8
 
4
- def initialize(options={})
9
+ def set(options={})
5
10
  @context = options[:context]
6
11
  @api_key = options[:api_key]
7
12
  @type = options[:type]
8
- @categories = nil
13
+ @categories, @terms, @geos, @relations = Array.new, Array.new, Array.new, Array.new
9
14
  end
10
15
 
11
16
  end
12
17
 
13
- %w{yahoo zemanta textwise}.each{|t| require "apis/#{t}"}
18
+ %w{yahoo zemanta textwise calais}.each{|t| require "apis/#{t}"}
Binary file
@@ -5,12 +5,12 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{sem_extractor}
8
- s.version = "0.0.1"
8
+ s.version = "0.0.2"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["apneadiving"]
12
12
  s.date = %q{2010-10-02}
13
- s.description = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
13
+ s.description = %q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
14
14
  s.email = %q{apnea.diving.deep@gmail.com}
15
15
  s.extra_rdoc_files = [
16
16
  "LICENSE",
@@ -22,11 +22,13 @@ Gem::Specification.new do |s|
22
22
  "Rakefile",
23
23
  "VERSION",
24
24
  "lib/.DS_Store",
25
+ "lib/apis/calais.rb",
25
26
  "lib/apis/textwise.rb",
26
27
  "lib/apis/yahoo.rb",
27
28
  "lib/apis/zemanta.rb",
28
29
  "lib/sem_extractor.rb",
29
30
  "pkg/sem_extractor-0.0.0.gem",
31
+ "pkg/sem_extractor-0.0.1.gem",
30
32
  "sem_extractor.gemspec",
31
33
  "test/helper.rb",
32
34
  "test/test_sem_extractor.rb"
@@ -35,7 +37,7 @@ Gem::Specification.new do |s|
35
37
  s.rdoc_options = ["--charset=UTF-8"]
36
38
  s.require_paths = ["lib"]
37
39
  s.rubygems_version = %q{1.3.7}
38
- s.summary = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
40
+ s.summary = %q{Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo}
39
41
  s.test_files = [
40
42
  "test/helper.rb",
41
43
  "test/test_sem_extractor.rb"
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sem_extractor
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 27
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
8
  - 0
9
- - 1
10
- version: 0.0.1
9
+ - 2
10
+ version: 0.0.2
11
11
  platform: ruby
12
12
  authors:
13
13
  - apneadiving
@@ -32,7 +32,7 @@ dependencies:
32
32
  version: "0"
33
33
  type: :runtime
34
34
  version_requirements: *id001
35
- description: Extracts data from semantics API like zemanta, textwise and yahoo
35
+ description: "Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo"
36
36
  email: apnea.diving.deep@gmail.com
37
37
  executables: []
38
38
 
@@ -47,11 +47,13 @@ files:
47
47
  - Rakefile
48
48
  - VERSION
49
49
  - lib/.DS_Store
50
+ - lib/apis/calais.rb
50
51
  - lib/apis/textwise.rb
51
52
  - lib/apis/yahoo.rb
52
53
  - lib/apis/zemanta.rb
53
54
  - lib/sem_extractor.rb
54
55
  - pkg/sem_extractor-0.0.0.gem
56
+ - pkg/sem_extractor-0.0.1.gem
55
57
  - sem_extractor.gemspec
56
58
  - test/helper.rb
57
59
  - test/test_sem_extractor.rb
@@ -88,7 +90,7 @@ rubyforge_project:
88
90
  rubygems_version: 1.3.7
89
91
  signing_key:
90
92
  specification_version: 3
91
- summary: Extracts data from semantics API like zemanta, textwise and yahoo
93
+ summary: "Extracts data from semantics APIs: zemanta, textwise, opencalais and yahoo"
92
94
  test_files:
93
95
  - test/helper.rb
94
96
  - test/test_sem_extractor.rb