yahoo_content_analysis 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/.gitignore ADDED
@@ -0,0 +1,18 @@
1
+ *.gem
2
+ *.rbc
3
+ .bundle
4
+ .config
5
+ .key
6
+ .yardoc
7
+ Gemfile.lock
8
+ InstalledFiles
9
+ _yardoc
10
+ coverage
11
+ doc/
12
+ lib/bundler/man
13
+ pkg
14
+ rdoc
15
+ spec/reports
16
+ test/tmp
17
+ test/version_tmp
18
+ tmp
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in yahoo_content_analysis.gemspec
4
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,22 @@
1
+ Copyright (c) 2013 Andrew Kuklewicz
2
+
3
+ MIT License
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining
6
+ a copy of this software and associated documentation files (the
7
+ "Software"), to deal in the Software without restriction, including
8
+ without limitation the rights to use, copy, modify, merge, publish,
9
+ distribute, sublicense, and/or sell copies of the Software, and to
10
+ permit persons to whom the Software is furnished to do so, subject to
11
+ the following conditions:
12
+
13
+ The above copyright notice and this permission notice shall be
14
+ included in all copies or substantial portions of the Software.
15
+
16
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
17
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
19
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
20
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
21
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
22
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,42 @@
1
+ # YahooContentAnalysis
2
+
3
+ Use the Yahoo! Content Analysis API to extract topics and entities.
4
+
5
+ OAuth access is implemented, though not necessary (the docs imply higher limitd if requests are signed). This should be a useful example for those wanting to do 2-legged OAuth access to Yahoo APIs using Faraday and the OAuth Faraday Middleware.
6
+
7
+ N.B. - I am not seeing the additional metadata nor related entities returned as the Yahoo docs claim they should.
8
+
9
+ ## Installation
10
+
11
+ Add this line to your application's Gemfile:
12
+
13
+ gem 'yahoo_content_analysis'
14
+
15
+ And then execute:
16
+
17
+ $ bundle
18
+
19
+ Or install it yourself as:
20
+
21
+ $ gem install yahoo_content_analysis
22
+
23
+ ## Usage
24
+ ```ruby
25
+ require 'yahoo_content_analysis'
26
+
27
+ YahooContentAnalysis.configure{|y|
28
+ y.api_key = ENV['YAHOO_API_KEY']
29
+ y.api_secret = ENV['YAHOO_API_SECRET']
30
+ }
31
+
32
+ r = YahooContentAnalysis::Client.new.analyze('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration.')
33
+
34
+ names = r.entities.collect{|e| e.name}
35
+ ```
36
+ ## Contributing
37
+
38
+ 1. Fork it
39
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
40
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
41
+ 4. Push to the branch (`git push origin my-new-feature`)
42
+ 5. Create new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
@@ -0,0 +1,69 @@
1
+ # encoding: utf-8
2
+
3
+ module YahooContentAnalysis
4
+ class Client
5
+
6
+ include Connection
7
+
8
+ attr_reader *YahooContentAnalysis::Configuration.keys
9
+
10
+ attr_accessor :current_options
11
+
12
+ class_eval do
13
+ YahooContentAnalysis::Configuration.keys.each do |key|
14
+ define_method "#{key}=" do |arg|
15
+ self.instance_variable_set("@#{key}", arg)
16
+ self.current_options.merge!({:"#{key}" => arg})
17
+ end
18
+ end
19
+ end
20
+
21
+ def initialize(options={}, &block)
22
+ setup(options)
23
+ yield(self) if block_given?
24
+ end
25
+
26
+ def setup(options={})
27
+ options = YahooContentAnalysis.options.merge(options)
28
+ self.current_options = options
29
+ Configuration.keys.each do |key|
30
+ send("#{key}=", options[key])
31
+ end
32
+ end
33
+
34
+ def analyze(content, opts={})
35
+ raise 'Specify a value for the content' unless content
36
+ response = connection.post do |request|
37
+ request.params = options(content)
38
+ end
39
+ YahooContentAnalysis::Response.new(response)
40
+ end
41
+
42
+ def query(content)
43
+ "SELECT * FROM contentanalysis.analyze WHERE related_entities = \"true\" and #{condition(content)}"
44
+ end
45
+
46
+ def condition(content)
47
+ content.is_a?(URI) ? url(content) : text(content)
48
+ end
49
+
50
+ def url(content)
51
+ %{ url = "#{content}" }
52
+ end
53
+
54
+ def text(content)
55
+ %{ text = "#{content.gsub('"', '\"')}" }
56
+ end
57
+
58
+ def options(content)
59
+ {
60
+ 'q' => query(content),
61
+ 'format' => 'json',
62
+ 'max' => '50',
63
+ 'related_entities' => 'true',
64
+ 'show_metadata' => 'true'
65
+ }
66
+ end
67
+
68
+ end
69
+ end
@@ -0,0 +1,103 @@
1
+ # encoding: utf-8
2
+
3
+ # these are additional params from the docs, figure out how to use them - some don't seem to do squat.
4
+ # related_entities boolean: true (default), false Whether or not to include related entities/concepts in the response
5
+ # show_metadata boolean: true (default), false Whether or not to include entity/concept metadata in the response
6
+ # enable_categorizer boolean: true (default), false Whether or not to include document category information in the response
7
+ # unique boolean: true, false (default) Whether or not to detect only one occurrence of an entity or a concept that my appear multiple times
8
+ # max integer: 100 (default) Maximum number of entities/concepts to detect
9
+
10
+ module YahooContentAnalysis
11
+
12
+ module Configuration
13
+
14
+ VALID_OPTIONS_KEYS = [
15
+ :api_key,
16
+ :api_secret,
17
+ :adapter,
18
+ :endpoint,
19
+ :user_agent,
20
+ :format,
21
+ :max,
22
+ :related_entities,
23
+ :show_metadata,
24
+ :enable_categorizer,
25
+ :unique
26
+ ].freeze
27
+
28
+ VALID_PARAMS = [
29
+ :format,
30
+ :max,
31
+ :related_entities,
32
+ :show_metadata,
33
+ :enable_categorizer,
34
+ :unique
35
+ ]
36
+
37
+ # this you need to get from yahoo - go register an app!
38
+ DEFAULT_API_KEY = nil
39
+
40
+ DEFAULT_API_SECRET = nil
41
+
42
+ # Adapters are whatever Faraday supports - I like excon alot, so I'm defaulting it
43
+ DEFAULT_ADAPTER = :excon
44
+
45
+ # The api endpoint for YQL
46
+ DEFAULT_ENDPOINT = 'http://query.yahooapis.com/v1/public/yql'.freeze
47
+
48
+ # The value sent in the http header for 'User-Agent' if none is set
49
+ DEFAULT_USER_AGENT = "YahooContentAnalysis Ruby Gem #{YahooContentAnalysis::VERSION}".freeze
50
+
51
+ DEFAULT_FORMAT = :json
52
+
53
+ DEFAULT_MAX = 50
54
+
55
+ DEFAULT_RELATED_ENTITIES = 'true'
56
+
57
+ DEFAULT_SHOW_METADATA = 'true'
58
+
59
+ DEFAULT_ENABLE_CATEGORIZER = 'true'
60
+
61
+ DEFAULT_UNIQUE = 'true'
62
+
63
+ attr_accessor *VALID_OPTIONS_KEYS
64
+
65
+ # Convenience method to allow for global setting of configuration options
66
+ def configure
67
+ yield self
68
+ end
69
+
70
+ def self.extended(base)
71
+ base.reset!
72
+ end
73
+
74
+ class << self
75
+ def keys
76
+ VALID_OPTIONS_KEYS
77
+ end
78
+ end
79
+
80
+ def options
81
+ options = {}
82
+ VALID_OPTIONS_KEYS.each { |k| options[k] = send(k) }
83
+ options
84
+ end
85
+
86
+ # Reset configuration options to their defaults
87
+ def reset!
88
+ self.api_key = DEFAULT_API_KEY
89
+ self.api_secret = DEFAULT_API_SECRET
90
+ self.adapter = DEFAULT_ADAPTER
91
+ self.endpoint = DEFAULT_ENDPOINT
92
+ self.user_agent = DEFAULT_USER_AGENT
93
+ self.format = DEFAULT_FORMAT
94
+ self.max = DEFAULT_MAX
95
+ self.related_entities = DEFAULT_RELATED_ENTITIES
96
+ self.show_metadata = DEFAULT_SHOW_METADATA
97
+ self.enable_categorizer = DEFAULT_ENABLE_CATEGORIZER
98
+ self.unique = DEFAULT_UNIQUE
99
+ self
100
+ end
101
+
102
+ end
103
+ end
@@ -0,0 +1,55 @@
1
+ # encoding: utf-8
2
+
3
+ require 'faraday_middleware'
4
+
5
+ module YahooContentAnalysis
6
+ module Connection
7
+
8
+ ALLOWED_OPTIONS = [
9
+ :headers,
10
+ :url,
11
+ :params,
12
+ :request,
13
+ :ssl
14
+ ].freeze
15
+
16
+ def add_default_options(opts={})
17
+ headers = opts.delete(:headers) || {}
18
+ options = {
19
+ :consumer_key => api_key,
20
+ :consumer_secret => api_secret,
21
+ :headers => {
22
+ # generic http headers
23
+ 'User-Agent' => user_agent,
24
+ 'Accept' => "application/json;charset=utf-8"
25
+ },
26
+ :ssl => {:verify => false},
27
+ :url => endpoint
28
+ }.merge(opts)
29
+ options[:headers] = options[:headers].merge(headers)
30
+ options
31
+ end
32
+
33
+ def connection(options={})
34
+ opts = add_default_options(options)
35
+ oauth = oauth_opts(opts)
36
+ Faraday::Connection.new(opts) do |connection|
37
+ connection.request :oauth, oauth if oauth
38
+ connection.request :url_encoded
39
+ connection.response :mashify
40
+ connection.response :logger if ENV['DEBUG']
41
+ connection.response :json
42
+ connection.adapter(adapter)
43
+ end
44
+ end
45
+
46
+ def oauth_opts(opts)
47
+ oauth_opts = if opts.key?(:consumer_secret) && opts.key?(:consumer_key)
48
+ {:consumer_secret => opts.delete(:consumer_secret), :consumer_key => opts.delete(:consumer_key)}
49
+ else
50
+ nil
51
+ end
52
+ end
53
+
54
+ end
55
+ end
@@ -0,0 +1,58 @@
1
+ # encoding: utf-8
2
+ require 'language_list'
3
+
4
+ module YahooContentAnalysis
5
+ class Response
6
+ attr_accessor :raw, :language, :topics, :tags, :entities, :relations, :locations
7
+
8
+ def initialize(response)
9
+ @raw = response
10
+
11
+ @language = nil
12
+ @topics = []
13
+ @tags = []
14
+ @entities = []
15
+ @relations = []
16
+ @locations = []
17
+
18
+ parse(response)
19
+ end
20
+
21
+ def parse(response)
22
+ r = response.body['query']['results'] || {}
23
+ @language = get_language(r['lang'])
24
+
25
+ @topics = Array((r['yctCategories'] || {})['yctCategory']).collect do |cat|
26
+ {:name => cat['content'], :score => cat['score']}
27
+ end
28
+
29
+ @entities = Array((r['entities'] || {})['entity']).collect do |ent|
30
+ type = extract_type(ent['types'])
31
+
32
+ entity = {:name => ent['text']['content'], :score => ent['score']}
33
+ entity[:type] = type if type
34
+ entity[:wikipedia_url] = ent['wiki_url'] if ent['wiki_url']
35
+
36
+ ## these aren't showing up in any results, so not worrying about them
37
+ # if cat['related_entities'] && cat['related_entities']['wikipedia']
38
+ # end
39
+
40
+ entity
41
+ end
42
+
43
+ end
44
+
45
+ def get_language(lang)
46
+ return nil unless lang
47
+ l = LanguageList::LanguageInfo.find(lang.split('-')[0].downcase)
48
+ l.name
49
+ end
50
+
51
+ def extract_type(h)
52
+ return nil unless (h && h['type'])
53
+ type = h['type'].is_a?(Array) ? h['type'].first : h['type']
54
+ (type['content'] || '').sub(/^\/(.*)/, '')
55
+ end
56
+
57
+ end
58
+ end
@@ -0,0 +1,5 @@
1
+ # encoding: utf-8
2
+
3
+ module YahooContentAnalysis
4
+ VERSION = "0.0.1"
5
+ end
@@ -0,0 +1,12 @@
1
+ # encoding: utf-8
2
+
3
+ require 'rubygems'
4
+ require 'yahoo_content_analysis/version'
5
+ require 'yahoo_content_analysis/configuration'
6
+ require 'yahoo_content_analysis/connection'
7
+ require 'yahoo_content_analysis/response'
8
+ require 'yahoo_content_analysis/client'
9
+
10
+ module YahooContentAnalysis
11
+ extend Configuration
12
+ end
@@ -0,0 +1,7 @@
1
+ require 'yahoo_content_analysis'
2
+ YahooContentAnalysis.configure{|y|
3
+ y.api_key = ENV['YAHOO_API_KEY']
4
+ y.api_secret = ENV['YAHOO_API_SECRET']
5
+ }
6
+
7
+ r = YahooContentAnalysis::Client.new.analyze('Italian sculptors and painters of the renaissance favored the Virgin Mary for inspiration.')
@@ -0,0 +1,30 @@
1
+ # -*- encoding: utf-8 -*-
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'yahoo_content_analysis/version'
5
+
6
+ Gem::Specification.new do |gem|
7
+ gem.name = "yahoo_content_analysis"
8
+ gem.version = YahooContentAnalysis::VERSION
9
+ gem.authors = ["Andrew Kuklewicz"]
10
+ gem.email = ["andrew@prx.org"]
11
+ gem.description = %q{Use the Yahoo! Content Analysis API to extract topics and entities.}
12
+ gem.summary = %q{Use the Yahoo! Content Analysis API to extract topics and entities.}
13
+ gem.homepage = "https://github.com/PRX/yahoo_content_analysis"
14
+
15
+ gem.files = `git ls-files`.split($/)
16
+ gem.executables = gem.files.grep(%r{^bin/}).map{ |f| File.basename(f) }
17
+ gem.test_files = gem.files.grep(%r{^(test|spec|features)/})
18
+ gem.require_paths = ["lib"]
19
+
20
+ gem.add_runtime_dependency('faraday', ['>= 0.7.4', '< 0.9'])
21
+ gem.add_runtime_dependency('faraday_middleware', '~> 0.9')
22
+ gem.add_runtime_dependency('multi_json', '>= 1.0.3', '~> 1.0')
23
+ gem.add_runtime_dependency('excon')
24
+ gem.add_runtime_dependency('hashie', '>= 0.4.0')
25
+ gem.add_runtime_dependency('simple_oauth')
26
+ gem.add_runtime_dependency('language_list')
27
+
28
+ gem.add_development_dependency('rake')
29
+ gem.add_development_dependency('minitest')
30
+ end
metadata ADDED
@@ -0,0 +1,221 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: yahoo_content_analysis
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.0.1
5
+ prerelease:
6
+ platform: ruby
7
+ authors:
8
+ - Andrew Kuklewicz
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+ date: 2013-04-09 00:00:00.000000000 Z
13
+ dependencies:
14
+ - !ruby/object:Gem::Dependency
15
+ name: faraday
16
+ requirement: !ruby/object:Gem::Requirement
17
+ none: false
18
+ requirements:
19
+ - - ! '>='
20
+ - !ruby/object:Gem::Version
21
+ version: 0.7.4
22
+ - - <
23
+ - !ruby/object:Gem::Version
24
+ version: '0.9'
25
+ type: :runtime
26
+ prerelease: false
27
+ version_requirements: !ruby/object:Gem::Requirement
28
+ none: false
29
+ requirements:
30
+ - - ! '>='
31
+ - !ruby/object:Gem::Version
32
+ version: 0.7.4
33
+ - - <
34
+ - !ruby/object:Gem::Version
35
+ version: '0.9'
36
+ - !ruby/object:Gem::Dependency
37
+ name: faraday_middleware
38
+ requirement: !ruby/object:Gem::Requirement
39
+ none: false
40
+ requirements:
41
+ - - ~>
42
+ - !ruby/object:Gem::Version
43
+ version: '0.9'
44
+ type: :runtime
45
+ prerelease: false
46
+ version_requirements: !ruby/object:Gem::Requirement
47
+ none: false
48
+ requirements:
49
+ - - ~>
50
+ - !ruby/object:Gem::Version
51
+ version: '0.9'
52
+ - !ruby/object:Gem::Dependency
53
+ name: multi_json
54
+ requirement: !ruby/object:Gem::Requirement
55
+ none: false
56
+ requirements:
57
+ - - ! '>='
58
+ - !ruby/object:Gem::Version
59
+ version: 1.0.3
60
+ - - ~>
61
+ - !ruby/object:Gem::Version
62
+ version: '1.0'
63
+ type: :runtime
64
+ prerelease: false
65
+ version_requirements: !ruby/object:Gem::Requirement
66
+ none: false
67
+ requirements:
68
+ - - ! '>='
69
+ - !ruby/object:Gem::Version
70
+ version: 1.0.3
71
+ - - ~>
72
+ - !ruby/object:Gem::Version
73
+ version: '1.0'
74
+ - !ruby/object:Gem::Dependency
75
+ name: excon
76
+ requirement: !ruby/object:Gem::Requirement
77
+ none: false
78
+ requirements:
79
+ - - ! '>='
80
+ - !ruby/object:Gem::Version
81
+ version: '0'
82
+ type: :runtime
83
+ prerelease: false
84
+ version_requirements: !ruby/object:Gem::Requirement
85
+ none: false
86
+ requirements:
87
+ - - ! '>='
88
+ - !ruby/object:Gem::Version
89
+ version: '0'
90
+ - !ruby/object:Gem::Dependency
91
+ name: hashie
92
+ requirement: !ruby/object:Gem::Requirement
93
+ none: false
94
+ requirements:
95
+ - - ! '>='
96
+ - !ruby/object:Gem::Version
97
+ version: 0.4.0
98
+ type: :runtime
99
+ prerelease: false
100
+ version_requirements: !ruby/object:Gem::Requirement
101
+ none: false
102
+ requirements:
103
+ - - ! '>='
104
+ - !ruby/object:Gem::Version
105
+ version: 0.4.0
106
+ - !ruby/object:Gem::Dependency
107
+ name: simple_oauth
108
+ requirement: !ruby/object:Gem::Requirement
109
+ none: false
110
+ requirements:
111
+ - - ! '>='
112
+ - !ruby/object:Gem::Version
113
+ version: '0'
114
+ type: :runtime
115
+ prerelease: false
116
+ version_requirements: !ruby/object:Gem::Requirement
117
+ none: false
118
+ requirements:
119
+ - - ! '>='
120
+ - !ruby/object:Gem::Version
121
+ version: '0'
122
+ - !ruby/object:Gem::Dependency
123
+ name: language_list
124
+ requirement: !ruby/object:Gem::Requirement
125
+ none: false
126
+ requirements:
127
+ - - ! '>='
128
+ - !ruby/object:Gem::Version
129
+ version: '0'
130
+ type: :runtime
131
+ prerelease: false
132
+ version_requirements: !ruby/object:Gem::Requirement
133
+ none: false
134
+ requirements:
135
+ - - ! '>='
136
+ - !ruby/object:Gem::Version
137
+ version: '0'
138
+ - !ruby/object:Gem::Dependency
139
+ name: rake
140
+ requirement: !ruby/object:Gem::Requirement
141
+ none: false
142
+ requirements:
143
+ - - ! '>='
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ none: false
150
+ requirements:
151
+ - - ! '>='
152
+ - !ruby/object:Gem::Version
153
+ version: '0'
154
+ - !ruby/object:Gem::Dependency
155
+ name: minitest
156
+ requirement: !ruby/object:Gem::Requirement
157
+ none: false
158
+ requirements:
159
+ - - ! '>='
160
+ - !ruby/object:Gem::Version
161
+ version: '0'
162
+ type: :development
163
+ prerelease: false
164
+ version_requirements: !ruby/object:Gem::Requirement
165
+ none: false
166
+ requirements:
167
+ - - ! '>='
168
+ - !ruby/object:Gem::Version
169
+ version: '0'
170
+ description: Use the Yahoo! Content Analysis API to extract topics and entities.
171
+ email:
172
+ - andrew@prx.org
173
+ executables: []
174
+ extensions: []
175
+ extra_rdoc_files: []
176
+ files:
177
+ - .gitignore
178
+ - Gemfile
179
+ - LICENSE.txt
180
+ - README.md
181
+ - Rakefile
182
+ - lib/yahoo_content_analysis.rb
183
+ - lib/yahoo_content_analysis/client.rb
184
+ - lib/yahoo_content_analysis/configuration.rb
185
+ - lib/yahoo_content_analysis/connection.rb
186
+ - lib/yahoo_content_analysis/response.rb
187
+ - lib/yahoo_content_analysis/version.rb
188
+ - test/client_test.rb
189
+ - yahoo_content_analysis.gemspec
190
+ homepage: https://github.com/PRX/yahoo_content_analysis
191
+ licenses: []
192
+ post_install_message:
193
+ rdoc_options: []
194
+ require_paths:
195
+ - lib
196
+ required_ruby_version: !ruby/object:Gem::Requirement
197
+ none: false
198
+ requirements:
199
+ - - ! '>='
200
+ - !ruby/object:Gem::Version
201
+ version: '0'
202
+ segments:
203
+ - 0
204
+ hash: 2699003970576411257
205
+ required_rubygems_version: !ruby/object:Gem::Requirement
206
+ none: false
207
+ requirements:
208
+ - - ! '>='
209
+ - !ruby/object:Gem::Version
210
+ version: '0'
211
+ segments:
212
+ - 0
213
+ hash: 2699003970576411257
214
+ requirements: []
215
+ rubyforge_project:
216
+ rubygems_version: 1.8.23
217
+ signing_key:
218
+ specification_version: 3
219
+ summary: Use the Yahoo! Content Analysis API to extract topics and entities.
220
+ test_files:
221
+ - test/client_test.rb