sem_extractor 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/LICENSE ADDED
@@ -0,0 +1,20 @@
1
+ Copyright (c) 2009 apneadiving
2
+
3
+ Permission is hereby granted, free of charge, to any person obtaining
4
+ a copy of this software and associated documentation files (the
5
+ "Software"), to deal in the Software without restriction, including
6
+ without limitation the rights to use, copy, modify, merge, publish,
7
+ distribute, sublicense, and/or sell copies of the Software, and to
8
+ permit persons to whom the Software is furnished to do so, subject to
9
+ the following conditions:
10
+
11
+ The above copyright notice and this permission notice shall be
12
+ included in all copies or substantial portions of the Software.
13
+
14
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
15
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
16
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
17
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
18
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
19
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
20
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.rdoc ADDED
@@ -0,0 +1,37 @@
1
+ = sem_extractor
2
+ SemExtractor is made to have in a single place, wrappers for most of the semantic librairies:
3
+ - Zemanta
4
+ - Semantic Hacker from Textwise
5
+ - Yahoo Boss
6
+
7
+ Please tell me if there are more API's to include!
8
+
9
+ After using Term Extraction gem, I happened to need the score of the different tags I got from the different APIS + I wanted to use Nokogiri for performance concerns.
10
+ Most of the methods below retrieve a hash with 'name' and 'score'
11
+
12
+ Initialize:
13
+ - yahoo = SemExtractor::Yahoo.new(:api_key => your_key, :context => your_text)
14
+ - zemanta = SemExtractor::Zemanta.new(:api_key => your_key, :context => your_text)
15
+ - sem = SemExtractor::Textwise.new(:api_key => your_key, :context => your_text_or_url)
16
+
17
+ Get info:
18
+ - yahoo.terms
19
+ - zemanta.terms
20
+ - zemanta.categories
21
+ - sem.terms
22
+ - sem.categories
23
+ - sem.filter #filters the useful content of a web page, retrieves text
24
+
25
+ == Note on Patches/Pull Requests
26
+
27
+ * Fork the project.
28
+ * Make your feature addition or bug fix.
29
+ * Add tests for it. This is important so I don't break it in a
30
+ future version unintentionally.
31
+ * Commit, do not mess with rakefile, version, or history.
32
+ (if you want to have your own version, that is fine but bump version in a commit by itself I can ignore when I pull)
33
+ * Send me a pull request. Bonus points for topic branches.
34
+
35
+ == Copyright
36
+
37
+ Copyright (c) 2010 apneadiving. See LICENSE for details.
data/Rakefile ADDED
@@ -0,0 +1,53 @@
1
+ require 'rubygems'
2
+ require 'rake'
3
+
4
+ begin
5
+ require 'jeweler'
6
+ Jeweler::Tasks.new do |gem|
7
+ gem.name = "sem_extractor"
8
+ gem.summary = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
9
+ gem.description = %Q{Extracts data from semantics API like zemanta, textwise and yahoo}
10
+ gem.email = "apnea.diving.deep@gmail.com"
11
+ gem.homepage = "http://github.com/apneadiving/sem_extractor"
12
+ gem.authors = ["apneadiving"]
13
+ gem.add_dependency "nokogiri", ">= 0"
14
+ # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
15
+ end
16
+ Jeweler::GemcutterTasks.new
17
+ rescue LoadError
18
+ puts "Jeweler (or a dependency) not available. Install it with: gem install jeweler"
19
+ end
20
+
21
+ require 'rake/testtask'
22
+ Rake::TestTask.new(:test) do |test|
23
+ test.libs << 'lib' << 'test'
24
+ test.pattern = 'test/**/test_*.rb'
25
+ test.verbose = true
26
+ end
27
+
28
+ begin
29
+ require 'rcov/rcovtask'
30
+ Rcov::RcovTask.new do |test|
31
+ test.libs << 'test'
32
+ test.pattern = 'test/**/test_*.rb'
33
+ test.verbose = true
34
+ end
35
+ rescue LoadError
36
+ task :rcov do
37
+ abort "RCov is not available. In order to run rcov, you must: sudo gem install spicycode-rcov"
38
+ end
39
+ end
40
+
41
+ task :test => :check_dependencies
42
+
43
+ task :default => :test
44
+
45
+ require 'rake/rdoctask'
46
+ Rake::RDocTask.new do |rdoc|
47
+ version = File.exist?('VERSION') ? File.read('VERSION') : ""
48
+
49
+ rdoc.rdoc_dir = 'rdoc'
50
+ rdoc.title = "sem_extractor #{version}"
51
+ rdoc.rdoc_files.include('README*')
52
+ rdoc.rdoc_files.include('lib/**/*.rb')
53
+ end
data/VERSION ADDED
@@ -0,0 +1 @@
1
+ 0.0.1
data/lib/.DS_Store ADDED
Binary file
@@ -0,0 +1,59 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Textwise < SemExtractor
7
+ def terms
8
+ @options = { 'content' => @context }
9
+ get_entity
10
+ end
11
+
12
+ def categories
13
+ @options = {'content' => @context, 'showLabels' => "true" }
14
+ @type = 'category'
15
+ get_entity
16
+ end
17
+
18
+ def filter
19
+ @options = {'uri' => @context }
20
+ @type = 'filter/web'
21
+ remote_xml
22
+ end
23
+
24
+ def match
25
+ @type = 'match/rsscombined'
26
+ @options = {'content' => @context }
27
+ puts remote_xml
28
+ end
29
+
30
+ def get_entity
31
+ begin
32
+ Nokogiri::XML(remote_xml).css(@type).map { |h| {"score" => h['weight'], "name" => h['label']} }
33
+ rescue
34
+ []
35
+ end
36
+ end
37
+
38
+ def uri
39
+ api_uri = URI.parse(gateway)
40
+ api_uri.query = @options.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
41
+ api_uri
42
+ end
43
+
44
+ private
45
+ def gateway
46
+ @type ||= 'concept'
47
+ 'http://api.semantichacker.com/' + @api_key + '/' + @type + '?'
48
+ end
49
+
50
+ def remote_xml
51
+ begin
52
+ open(uri).read
53
+ rescue => e
54
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
55
+ nil
56
+ end
57
+ end
58
+ end
59
+ end
data/lib/apis/yahoo.rb ADDED
@@ -0,0 +1,39 @@
1
+ require 'nokogiri'
2
+ require 'open-uri'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Yahoo < SemExtractor
7
+ def terms
8
+ begin
9
+ Nokogiri::XML(remote_xml).css('Result').map { |h| {"name" => h.content} }
10
+ rescue
11
+ []
12
+ end
13
+ end
14
+
15
+ def uri
16
+ api_uri = URI.parse(gateway)
17
+ api_uri.query = {
18
+ 'appid' => @api_key,
19
+ 'output' => 'xml',
20
+ 'context' => @context
21
+ }.map { |k,v| "#{URI.escape(k || '')}=#{URI.escape(v || '')}" }.join('&')
22
+ api_uri
23
+ end
24
+
25
+ private
26
+ def gateway
27
+ 'http://search.yahooapis.com/ContentAnalysisService/V1/termExtraction'
28
+ end
29
+
30
+ def remote_xml
31
+ begin
32
+ open(uri).read
33
+ rescue => e
34
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
35
+ nil
36
+ end
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,52 @@
1
+ require 'nokogiri'
2
+ require 'net/http'
3
+ require 'uri'
4
+
5
+ class SemExtractor
6
+ class Zemanta < SemExtractor
7
+
8
+ def terms
9
+ begin
10
+ @categories = Nokogiri::XML(remote_xml).css('category').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
11
+ Nokogiri::XML(remote_xml).css('keyword').map { |h| {"score" => h.css('confidence').first.content, "name" => h.css('name').first.content} }
12
+ rescue
13
+ []
14
+ end
15
+ end
16
+
17
+ def categories
18
+ terms if @categories == nil
19
+ return @categories
20
+ end
21
+
22
+ def uri
23
+ URI.parse(gateway)
24
+ end
25
+
26
+ def post_params
27
+ {
28
+ 'method' =>'zemanta.suggest',
29
+ 'api_key' => @api_key,
30
+ 'return_images' => 0,
31
+ 'text' => @context,
32
+ 'format' => 'xml',
33
+ 'articles_limit' => 1,
34
+ 'return_categories' => 'dmoz'
35
+ }
36
+ end
37
+
38
+ private
39
+ def gateway
40
+ 'http://api.zemanta.com/services/rest/0.0/'
41
+ end
42
+
43
+ def remote_xml
44
+ begin
45
+ Net::HTTP.post_form(uri, post_params).body
46
+ rescue => e
47
+ $stderr.puts "Couldn't fetch from API: #{e.message}" if $VERBOSE
48
+ nil
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,13 @@
1
+ class SemExtractor
2
+ attr_accessor :context, :api_key, :categories
3
+
4
+ def initialize(options={})
5
+ @context = options[:context]
6
+ @api_key = options[:api_key]
7
+ @type = options[:type]
8
+ @categories = nil
9
+ end
10
+
11
+ end
12
+
13
+ %w{yahoo zemanta textwise}.each{|t| require "apis/#{t}"}
Binary file
@@ -0,0 +1,57 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{sem_extractor}
8
+ s.version = "0.0.1"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["apneadiving"]
12
+ s.date = %q{2010-10-02}
13
+ s.description = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
14
+ s.email = %q{apnea.diving.deep@gmail.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ "LICENSE",
21
+ "README.rdoc",
22
+ "Rakefile",
23
+ "VERSION",
24
+ "lib/.DS_Store",
25
+ "lib/apis/textwise.rb",
26
+ "lib/apis/yahoo.rb",
27
+ "lib/apis/zemanta.rb",
28
+ "lib/sem_extractor.rb",
29
+ "pkg/sem_extractor-0.0.0.gem",
30
+ "sem_extractor.gemspec",
31
+ "test/helper.rb",
32
+ "test/test_sem_extractor.rb"
33
+ ]
34
+ s.homepage = %q{http://github.com/apneadiving/sem_extractor}
35
+ s.rdoc_options = ["--charset=UTF-8"]
36
+ s.require_paths = ["lib"]
37
+ s.rubygems_version = %q{1.3.7}
38
+ s.summary = %q{Extracts data from semantics API like zemanta, textwise and yahoo}
39
+ s.test_files = [
40
+ "test/helper.rb",
41
+ "test/test_sem_extractor.rb"
42
+ ]
43
+
44
+ if s.respond_to? :specification_version then
45
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
46
+ s.specification_version = 3
47
+
48
+ if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
49
+ s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
50
+ else
51
+ s.add_dependency(%q<nokogiri>, [">= 0"])
52
+ end
53
+ else
54
+ s.add_dependency(%q<nokogiri>, [">= 0"])
55
+ end
56
+ end
57
+
data/test/helper.rb ADDED
@@ -0,0 +1,10 @@
1
+ require 'rubygems'
2
+ require 'test/unit'
3
+ require 'shoulda'
4
+
5
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
6
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
7
+ require 'sem_extractor'
8
+
9
+ class Test::Unit::TestCase
10
+ end
@@ -0,0 +1,7 @@
1
+ require 'helper'
2
+
3
+ class TestSemExtractor < Test::Unit::TestCase
4
+ should "probably rename this file and start testing for real" do
5
+ flunk "hey buddy, you should probably rename this file and start testing for real"
6
+ end
7
+ end
metadata ADDED
@@ -0,0 +1,94 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sem_extractor
3
+ version: !ruby/object:Gem::Version
4
+ hash: 29
5
+ prerelease: false
6
+ segments:
7
+ - 0
8
+ - 0
9
+ - 1
10
+ version: 0.0.1
11
+ platform: ruby
12
+ authors:
13
+ - apneadiving
14
+ autorequire:
15
+ bindir: bin
16
+ cert_chain: []
17
+
18
+ date: 2010-10-02 00:00:00 +02:00
19
+ default_executable:
20
+ dependencies:
21
+ - !ruby/object:Gem::Dependency
22
+ name: nokogiri
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
26
+ requirements:
27
+ - - ">="
28
+ - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
32
+ version: "0"
33
+ type: :runtime
34
+ version_requirements: *id001
35
+ description: Extracts data from semantics API like zemanta, textwise and yahoo
36
+ email: apnea.diving.deep@gmail.com
37
+ executables: []
38
+
39
+ extensions: []
40
+
41
+ extra_rdoc_files:
42
+ - LICENSE
43
+ - README.rdoc
44
+ files:
45
+ - LICENSE
46
+ - README.rdoc
47
+ - Rakefile
48
+ - VERSION
49
+ - lib/.DS_Store
50
+ - lib/apis/textwise.rb
51
+ - lib/apis/yahoo.rb
52
+ - lib/apis/zemanta.rb
53
+ - lib/sem_extractor.rb
54
+ - pkg/sem_extractor-0.0.0.gem
55
+ - sem_extractor.gemspec
56
+ - test/helper.rb
57
+ - test/test_sem_extractor.rb
58
+ has_rdoc: true
59
+ homepage: http://github.com/apneadiving/sem_extractor
60
+ licenses: []
61
+
62
+ post_install_message:
63
+ rdoc_options:
64
+ - --charset=UTF-8
65
+ require_paths:
66
+ - lib
67
+ required_ruby_version: !ruby/object:Gem::Requirement
68
+ none: false
69
+ requirements:
70
+ - - ">="
71
+ - !ruby/object:Gem::Version
72
+ hash: 3
73
+ segments:
74
+ - 0
75
+ version: "0"
76
+ required_rubygems_version: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ">="
80
+ - !ruby/object:Gem::Version
81
+ hash: 3
82
+ segments:
83
+ - 0
84
+ version: "0"
85
+ requirements: []
86
+
87
+ rubyforge_project:
88
+ rubygems_version: 1.3.7
89
+ signing_key:
90
+ specification_version: 3
91
+ summary: Extracts data from semantics API like zemanta, textwise and yahoo
92
+ test_files:
93
+ - test/helper.rb
94
+ - test/test_sem_extractor.rb