sem_extractor 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 apneadiving
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,37 @@
1
+ = sem_extractor
2
+ SemExtractor is made to have in a single place, wrappers for most of the semantic librairies:
3
+ - Zemanta
4
+ - Semantic Hacker from Textwise
5
+ - Yahoo Boss
6
+
7
+ Please tell me if there are more API's to include!
8
+
9
+ After using Term Extraction gem, I happened to need the score of the different tags I got from the different APIS + I wanted to use Nokogiri for performance concerns.
10
+ Most of the methods below retrieve a hash with 'name' and 'score'
11
+
12
+ Initialize:
13
+ - yahoo = SemExtractor::Yahoo.new(:api_key => your_key, :context => your_text)
14
+ - zemanta = SemExtractor::Zemanta.new(:api_key => your_key, :context => your_text)
15
+ - sem = SemExtractor::Textwise.new(:api_key => your_key, :context => your_text_or_url)
16
+
17
+ Get info:
18
+ - yahoo.terms
19
+ - zemanta.terms
20
+ - zemanta.categories
21
+ - sem.terms
22
+ - sem.categories
23
+ - sem.filter #filters the useful content of a web page, retrieves text
24
+
25
+ == Note on Patches/Pull Requests
26
+
27
+ * Fork the project.
28
+ * Make your feature addition or bug fix.
29
+ * Add tests for it. This is important so I don't break it in a
30
+ future version unintentionally.
31
+ * Commit, do not mess with rakefile, version, or history.
32
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
33
+ * Send me a pull request. Bonus points for topic branches.
34
+
35
+ == Copyright
36
+
37
+ Copyright (c) 2010 apneadiving. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sem_extractor"
8
+ gem.summary = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
9
+ gem.description = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
10
+ gem.email = "apnea.diving.deep@gmail.com"
11
+ gem.homepage = "http://github.com/apneadiving/sem_extractor"
12
+ gem.authors = ["apneadiving"]
13
+ gem.add_dependency "nokogiri", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "sem_extractor #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,59 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Textwise < SemExtractor
7
+ def terms
8
+ @options = { 'content' => @context }
9
+ get_entity
10
+ end
11
+
12
+ def categories
13
+ @options = {'content' => @context, 'showLabels' => "true" }
14
+ @type = 'category'
15
+ get_entity
16
+ end
17
+
18
+ def filter
19
+ @options = {'uri' => @context }
20
+ @type = 'filter/web'
21
+ remote_xml
22
+ end
23
+
24
+ def match
25
+ @type = 'match/rsscombined'
26
+ @options = {'content' => @context }
27
+ puts remote_xml
28
+ end
29
+
30
+ def get_entity
31
+ begin
32
+ Nokogiri::XML(remote_xml).css(@type).map { |h| {"score" => h['weight'], "name" => h['label']} }
33
+ rescue
34
+ []
35
+ end
36
+ end
37
+
38
+ def uri
39
+ api_uri = URI.parse(gateway)
40
+ api_uri.query = @options.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
41
+ api_uri
42
+ end
43
+
44
+ private
45
+ def gateway
46
+ @type ||= 'concept'
47
+ 'http://api.semantichacker.com/' + @api_key + '/' + @type + '?'
48
+ end
49
+
50
+ def remote_xml
51
+ begin
52
+ open(uri).read
53
+ rescue => e
54
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
55
+ nil
56
+ end
57
+ end
58
+ end
59
+ end
data/lib/apis/yahoo.rb ADDED
@@ -0,0 +1,39 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Yahoo < SemExtractor
7
+ def terms
8
+ begin
9
+ Nokogiri::XML(remote_xml).css('Result').map { |h| {"name" => h.content} }
10
+ rescue
11
+ []
12
+ end
13
+ end
14
+
15
+ def uri
16
+ api_uri = URI.parse(gateway)
17
+ api_uri.query = {
18
+ 'appid' => @api_key,
19
+ 'output' => 'xml',
20
+ 'context' => @context
21
+ }.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
22
+ api_uri
23
+ end
24
+
25
+ private
26
+ def gateway
27
+ 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
28
+ end
29
+
30
+ def remote_xml
31
+ begin
32
+ open(uri).read
33
+ rescue => e
34
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
35
+ nil
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,52 @@
1
+ require 'nokogiri'
2
+ require 'net/http'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Zemanta < SemExtractor
7
+
8
+ def terms
9
+ begin
10
+ @categories = Nokogiri::XML(remote_xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
11
+ Nokogiri::XML(remote_xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
12
+ rescue
13
+ []
14
+ end
15
+ end
16
+
17
+ def categories
18
+ terms if @categories == nil
19
+ return @categories
20
+ end
21
+
22
+ def uri
23
+ URI.parse(gateway)
24
+ end
25
+
26
+ def post_params
27
+ {
28
+ 'method' =>'zemanta.suggest',
29
+ 'api_key' => @api_key,
30
+ 'return_images' => 0,
31
+ 'text' => @context,
32
+ 'format' => 'xml',
33
+ 'articles_limit' => 1,
34
+ 'return_categories' => 'dmoz'
35
+ }
36
+ end
37
+
38
+ private
39
+ def gateway
40
+ 'http://api.zemanta.com/services/rest/0.0/'
41
+ end
42
+
43
+ def remote_xml
44
+ begin
45
+ Net::HTTP.post_form(uri, post_params).body
46
+ rescue => e
47
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
48
+ nil
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,13 @@
1
+ class SemExtractor
2
+ attr_accessor :context, :api_key, :categories
3
+
4
+ def initialize(options={})
5
+ @context = options[:context]
6
+ @api_key = options[:api_key]
7
+ @type = options[:type]
8
+ @categories = nil
9
+ end
10
+
11
+ end
12
+
13
+ %w{yahoo zemanta textwise}.each{|t| require "apis/#{t}"}
Binary file
@@ -0,0 +1,57 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{sem_extractor}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["apneadiving"]
12
+ s.date = %q{2010-10-02}
13
+ s.description = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
14
+ s.email = %q{apnea.diving.deep@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ "LICENSE",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/.DS_Store",
25
+ "lib/apis/textwise.rb",
26
+ "lib/apis/yahoo.rb",
27
+ "lib/apis/zemanta.rb",
28
+ "lib/sem_extractor.rb",
29
+ "pkg/sem_extractor-0.0.0.gem",
30
+ "sem_extractor.gemspec",
31
+ "test/helper.rb",
32
+ "test/test_sem_extractor.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/apneadiving/sem_extractor}
35
+ s.rdoc_options = ["--charset=UTF-8"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.3.7}
38
+ s.summary = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
39
+ s.test_files = [
40
+ "test/helper.rb",
41
+ "test/test_sem_extractor.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<nokogiri>, [">= 0"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<nokogiri>, [">= 0"])
55
+ end
56
+ end
57
+
data/test/helper.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'sem_extractor'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestSemExtractor < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sem_extractor
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - apneadiving
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-10-02 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Extracts data from semantics API like zemanta, textwise and yahoo
36
+ email: apnea.diving.deep@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ - Rakefile
48
+ - VERSION
49
+ - lib/.DS_Store
50
+ - lib/apis/textwise.rb
51
+ - lib/apis/yahoo.rb
52
+ - lib/apis/zemanta.rb
53
+ - lib/sem_extractor.rb
54
+ - pkg/sem_extractor-0.0.0.gem
55
+ - sem_extractor.gemspec
56
+ - test/helper.rb
57
+ - test/test_sem_extractor.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/apneadiving/sem_extractor
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.7
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Extracts data from semantics API like zemanta, textwise and yahoo
92
+ test_files:
93
+ - test/helper.rb
94
+ - test/test_sem_extractor.rb