yahoo_content_analysis 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .key
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in yahoo_content_analysis.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andrew Kuklewicz
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # YahooContentAnalysis
2
+
3
+ Use the Yahoo! Content Analysis API to extract topics and entities.
4
+
5
+ OAuth access is implemented, though not necessary (the docs imply higher limitd if requests are signed). This should be a useful example for those wanting to do 2-legged OAuth access to Yahoo APIs using Faraday and the OAuth Faraday Middleware.
6
+
7
+ N.B. - I am not seeing the additional metadata nor related entities returned as the Yahoo docs claim they should.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'yahoo_content_analysis'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install yahoo_content_analysis
22
+
23
+ ## Usage
24
+ ```ruby
25
+ require 'yahoo_content_analysis'
26
+
27
+ YahooContentAnalysis.configure{|y|
28
+ y.api_key = ENV['YAHOO_API_KEY']
29
+ y.api_secret = ENV['YAHOO_API_SECRET']
30
+ }
31
+
32
+ r = YahooContentAnalysis::Client.new.analyze('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration.')
33
+
34
+ names = r.entities.collect{|e| e.name}
35
+ ```
36
+ ## Contributing
37
+
38
+ 1. Fork it
39
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
40
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
41
+ 4. Push to the branch (`git push origin my-new-feature`)
42
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,69 @@
1
+ # encoding: utf-8
2
+
3
+ module YahooContentAnalysis
4
+ class Client
5
+
6
+ include Connection
7
+
8
+ attr_reader *YahooContentAnalysis::Configuration.keys
9
+
10
+ attr_accessor :current_options
11
+
12
+ class_eval do
13
+ YahooContentAnalysis::Configuration.keys.each do |key|
14
+ define_method "#{key}=" do |arg|
15
+ self.instance_variable_set("@#{key}", arg)
16
+ self.current_options.merge!({:"#{key}" => arg})
17
+ end
18
+ end
19
+ end
20
+
21
+ def initialize(options={}, &block)
22
+ setup(options)
23
+ yield(self) if block_given?
24
+ end
25
+
26
+ def setup(options={})
27
+ options = YahooContentAnalysis.options.merge(options)
28
+ self.current_options = options
29
+ Configuration.keys.each do |key|
30
+ send("#{key}=", options[key])
31
+ end
32
+ end
33
+
34
+ def analyze(content, opts={})
35
+ raise 'Specify a value for the content' unless content
36
+ response = connection.post do |request|
37
+ request.params = options(content)
38
+ end
39
+ YahooContentAnalysis::Response.new(response)
40
+ end
41
+
42
+ def query(content)
43
+ "SELECT * FROM contentanalysis.analyze WHERE related_entities = \"true\" and #{condition(content)}"
44
+ end
45
+
46
+ def condition(content)
47
+ content.is_a?(URI) ? url(content) : text(content)
48
+ end
49
+
50
+ def url(content)
51
+ %{ url = "#{content}" }
52
+ end
53
+
54
+ def text(content)
55
+ %{ text = "#{content.gsub('"', '\"')}" }
56
+ end
57
+
58
+ def options(content)
59
+ {
60
+ 'q' => query(content),
61
+ 'format' => 'json',
62
+ 'max' => '50',
63
+ 'related_entities' => 'true',
64
+ 'show_metadata' => 'true'
65
+ }
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,103 @@
1
+ # encoding: utf-8
2
+
3
+ # these are additional params from the docs, figure out how to use them - some don't seem to do squat.
4
+ # related_entities boolean: true (default), false Whether or not to include related entities/concepts in the response
5
+ # show_metadata boolean: true (default), false Whether or not to include entity/concept metadata in the response
6
+ # enable_categorizer boolean: true (default), false Whether or not to include document category information in the response
7
+ # unique boolean: true, false (default) Whether or not to detect only one occurrence of an entity or a concept that my appear multiple times
8
+ # max integer: 100 (default) Maximum number of entities/concepts to detect
9
+
10
+ module YahooContentAnalysis
11
+
12
+ module Configuration
13
+
14
+ VALID_OPTIONS_KEYS = [
15
+ :api_key,
16
+ :api_secret,
17
+ :adapter,
18
+ :endpoint,
19
+ :user_agent,
20
+ :format,
21
+ :max,
22
+ :related_entities,
23
+ :show_metadata,
24
+ :enable_categorizer,
25
+ :unique
26
+ ].freeze
27
+
28
+ VALID_PARAMS = [
29
+ :format,
30
+ :max,
31
+ :related_entities,
32
+ :show_metadata,
33
+ :enable_categorizer,
34
+ :unique
35
+ ]
36
+
37
+ # this you need to get from yahoo - go register an app!
38
+ DEFAULT_API_KEY = nil
39
+
40
+ DEFAULT_API_SECRET = nil
41
+
42
+ # Adapters are whatever Faraday supports - I like excon alot, so I'm defaulting it
43
+ DEFAULT_ADAPTER = :excon
44
+
45
+ # The api endpoint for YQL
46
+ DEFAULT_ENDPOINT = 'http://query.yahooapis.com/v1/public/yql'.freeze
47
+
48
+ # The value sent in the http header for 'User-Agent' if none is set
49
+ DEFAULT_USER_AGENT = "YahooContentAnalysis Ruby Gem #{YahooContentAnalysis::VERSION}".freeze
50
+
51
+ DEFAULT_FORMAT = :json
52
+
53
+ DEFAULT_MAX = 50
54
+
55
+ DEFAULT_RELATED_ENTITIES = 'true'
56
+
57
+ DEFAULT_SHOW_METADATA = 'true'
58
+
59
+ DEFAULT_ENABLE_CATEGORIZER = 'true'
60
+
61
+ DEFAULT_UNIQUE = 'true'
62
+
63
+ attr_accessor *VALID_OPTIONS_KEYS
64
+
65
+ # Convenience method to allow for global setting of configuration options
66
+ def configure
67
+ yield self
68
+ end
69
+
70
+ def self.extended(base)
71
+ base.reset!
72
+ end
73
+
74
+ class << self
75
+ def keys
76
+ VALID_OPTIONS_KEYS
77
+ end
78
+ end
79
+
80
+ def options
81
+ options = {}
82
+ VALID_OPTIONS_KEYS.each { |k| options[k] = send(k) }
83
+ options
84
+ end
85
+
86
+ # Reset configuration options to their defaults
87
+ def reset!
88
+ self.api_key = DEFAULT_API_KEY
89
+ self.api_secret = DEFAULT_API_SECRET
90
+ self.adapter = DEFAULT_ADAPTER
91
+ self.endpoint = DEFAULT_ENDPOINT
92
+ self.user_agent = DEFAULT_USER_AGENT
93
+ self.format = DEFAULT_FORMAT
94
+ self.max = DEFAULT_MAX
95
+ self.related_entities = DEFAULT_RELATED_ENTITIES
96
+ self.show_metadata = DEFAULT_SHOW_METADATA
97
+ self.enable_categorizer = DEFAULT_ENABLE_CATEGORIZER
98
+ self.unique = DEFAULT_UNIQUE
99
+ self
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+
3
+ require 'faraday_middleware'
4
+
5
+ module YahooContentAnalysis
6
+ module Connection
7
+
8
+ ALLOWED_OPTIONS = [
9
+ :headers,
10
+ :url,
11
+ :params,
12
+ :request,
13
+ :ssl
14
+ ].freeze
15
+
16
+ def add_default_options(opts={})
17
+ headers = opts.delete(:headers) || {}
18
+ options = {
19
+ :consumer_key => api_key,
20
+ :consumer_secret => api_secret,
21
+ :headers => {
22
+ # generic http headers
23
+ 'User-Agent' => user_agent,
24
+ 'Accept' => "application/json;charset=utf-8"
25
+ },
26
+ :ssl => {:verify => false},
27
+ :url => endpoint
28
+ }.merge(opts)
29
+ options[:headers] = options[:headers].merge(headers)
30
+ options
31
+ end
32
+
33
+ def connection(options={})
34
+ opts = add_default_options(options)
35
+ oauth = oauth_opts(opts)
36
+ Faraday::Connection.new(opts) do |connection|
37
+ connection.request :oauth, oauth if oauth
38
+ connection.request :url_encoded
39
+ connection.response :mashify
40
+ connection.response :logger if ENV['DEBUG']
41
+ connection.response :json
42
+ connection.adapter(adapter)
43
+ end
44
+ end
45
+
46
+ def oauth_opts(opts)
47
+ oauth_opts = if opts.key?(:consumer_secret) && opts.key?(:consumer_key)
48
+ {:consumer_secret => opts.delete(:consumer_secret), :consumer_key => opts.delete(:consumer_key)}
49
+ else
50
+ nil
51
+ end
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,58 @@
1
+ # encoding: utf-8
2
+ require 'language_list'
3
+
4
+ module YahooContentAnalysis
5
+ class Response
6
+ attr_accessor :raw, :language, :topics, :tags, :entities, :relations, :locations
7
+
8
+ def initialize(response)
9
+ @raw = response
10
+
11
+ @language = nil
12
+ @topics = []
13
+ @tags = []
14
+ @entities = []
15
+ @relations = []
16
+ @locations = []
17
+
18
+ parse(response)
19
+ end
20
+
21
+ def parse(response)
22
+ r = response.body['query']['results'] || {}
23
+ @language = get_language(r['lang'])
24
+
25
+ @topics = Array((r['yctCategories'] || {})['yctCategory']).collect do |cat|
26
+ {:name => cat['content'], :score => cat['score']}
27
+ end
28
+
29
+ @entities = Array((r['entities'] || {})['entity']).collect do |ent|
30
+ type = extract_type(ent['types'])
31
+
32
+ entity = {:name => ent['text']['content'], :score => ent['score']}
33
+ entity[:type] = type if type
34
+ entity[:wikipedia_url] = ent['wiki_url'] if ent['wiki_url']
35
+
36
+ ## these aren't showing up in any results, so not worrying about them
37
+ # if cat['related_entities'] && cat['related_entities']['wikipedia']
38
+ # end
39
+
40
+ entity
41
+ end
42
+
43
+ end
44
+
45
+ def get_language(lang)
46
+ return nil unless lang
47
+ l = LanguageList::LanguageInfo.find(lang.split('-')[0].downcase)
48
+ l.name
49
+ end
50
+
51
+ def extract_type(h)
52
+ return nil unless (h && h['type'])
53
+ type = h['type'].is_a?(Array) ? h['type'].first : h['type']
54
+ (type['content'] || '').sub(/^\/(.*)/, '')
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ module YahooContentAnalysis
4
+ VERSION = "0.0.1"
5
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'yahoo_content_analysis/version'
5
+ require 'yahoo_content_analysis/configuration'
6
+ require 'yahoo_content_analysis/connection'
7
+ require 'yahoo_content_analysis/response'
8
+ require 'yahoo_content_analysis/client'
9
+
10
+ module YahooContentAnalysis
11
+ extend Configuration
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'yahoo_content_analysis'
2
+ YahooContentAnalysis.configure{|y|
3
+ y.api_key = ENV['YAHOO_API_KEY']
4
+ y.api_secret = ENV['YAHOO_API_SECRET']
5
+ }
6
+
7
+ r = YahooContentAnalysis::Client.new.analyze('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration.')
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'yahoo_content_analysis/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "yahoo_content_analysis"
8
+ gem.version = YahooContentAnalysis::VERSION
9
+ gem.authors = ["Andrew Kuklewicz"]
10
+ gem.email = ["andrew@prx.org"]
11
+ gem.description = %q{Use the Yahoo! Content Analysis API to extract topics and entities.}
12
+ gem.summary = %q{Use the Yahoo! Content Analysis API to extract topics and entities.}
13
+ gem.homepage = "https://github.com/PRX/yahoo_content_analysis"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_runtime_dependency('faraday', ['>= 0.7.4', '< 0.9'])
21
+ gem.add_runtime_dependency('faraday_middleware', '~> 0.9')
22
+ gem.add_runtime_dependency('multi_json', '>= 1.0.3', '~> 1.0')
23
+ gem.add_runtime_dependency('excon')
24
+ gem.add_runtime_dependency('hashie', '>= 0.4.0')
25
+ gem.add_runtime_dependency('simple_oauth')
26
+ gem.add_runtime_dependency('language_list')
27
+
28
+ gem.add_development_dependency('rake')
29
+ gem.add_development_dependency('minitest')
30
+ end
metadata ADDED
@@ -0,0 +1,221 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yahoo_content_analysis
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Kuklewicz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: faraday
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.7.4
22
+ - - <
23
+ - !ruby/object:Gem::Version
24
+ version: '0.9'
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.4
33
+ - - <
34
+ - !ruby/object:Gem::Version
35
+ version: '0.9'
36
+ - !ruby/object:Gem::Dependency
37
+ name: faraday_middleware
38
+ requirement: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: '0.9'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ~>
50
+ - !ruby/object:Gem::Version
51
+ version: '0.9'
52
+ - !ruby/object:Gem::Dependency
53
+ name: multi_json
54
+ requirement: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: 1.0.3
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ version: '1.0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: 1.0.3
71
+ - - ~>
72
+ - !ruby/object:Gem::Version
73
+ version: '1.0'
74
+ - !ruby/object:Gem::Dependency
75
+ name: excon
76
+ requirement: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ - !ruby/object:Gem::Dependency
91
+ name: hashie
92
+ requirement: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: 0.4.0
98
+ type: :runtime
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: 0.4.0
106
+ - !ruby/object:Gem::Dependency
107
+ name: simple_oauth
108
+ requirement: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ type: :runtime
115
+ prerelease: false
116
+ version_requirements: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ - !ruby/object:Gem::Dependency
123
+ name: language_list
124
+ requirement: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ! '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ type: :runtime
131
+ prerelease: false
132
+ version_requirements: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ! '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rake
140
+ requirement: !ruby/object:Gem::Requirement
141
+ none: false
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ - !ruby/object:Gem::Dependency
155
+ name: minitest
156
+ requirement: !ruby/object:Gem::Requirement
157
+ none: false
158
+ requirements:
159
+ - - ! '>='
160
+ - !ruby/object:Gem::Version
161
+ version: '0'
162
+ type: :development
163
+ prerelease: false
164
+ version_requirements: !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - ! '>='
168
+ - !ruby/object:Gem::Version
169
+ version: '0'
170
+ description: Use the Yahoo! Content Analysis API to extract topics and entities.
171
+ email:
172
+ - andrew@prx.org
173
+ executables: []
174
+ extensions: []
175
+ extra_rdoc_files: []
176
+ files:
177
+ - .gitignore
178
+ - Gemfile
179
+ - LICENSE.txt
180
+ - README.md
181
+ - Rakefile
182
+ - lib/yahoo_content_analysis.rb
183
+ - lib/yahoo_content_analysis/client.rb
184
+ - lib/yahoo_content_analysis/configuration.rb
185
+ - lib/yahoo_content_analysis/connection.rb
186
+ - lib/yahoo_content_analysis/response.rb
187
+ - lib/yahoo_content_analysis/version.rb
188
+ - test/client_test.rb
189
+ - yahoo_content_analysis.gemspec
190
+ homepage: https://github.com/PRX/yahoo_content_analysis
191
+ licenses: []
192
+ post_install_message:
193
+ rdoc_options: []
194
+ require_paths:
195
+ - lib
196
+ required_ruby_version: !ruby/object:Gem::Requirement
197
+ none: false
198
+ requirements:
199
+ - - ! '>='
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ segments:
203
+ - 0
204
+ hash: 2699003970576411257
205
+ required_rubygems_version: !ruby/object:Gem::Requirement
206
+ none: false
207
+ requirements:
208
+ - - ! '>='
209
+ - !ruby/object:Gem::Version
210
+ version: '0'
211
+ segments:
212
+ - 0
213
+ hash: 2699003970576411257
214
+ requirements: []
215
+ rubyforge_project:
216
+ rubygems_version: 1.8.23
217
+ signing_key:
218
+ specification_version: 3
219
+ summary: Use the Yahoo! Content Analysis API to extract topics and entities.
220
+ test_files:
221
+ - test/client_test.rb