sema_api_ruby 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5da03aaaec70b3e9e5bf3195c9c9552e348c9689
4
+ data.tar.gz: 3b2b8317183a67ee898ba0735bc67d569b8a8780
5
+ SHA512:
6
+ metadata.gz: 5826f9f85f98f38747a69adf8981de0c70d57b1d12e7c60bb4df0856ee47fc89e9e1cce7ee224f4ac14eb86ce5f91e86ef437d2388ec5c52fc38e1b23202b8b4
7
+ data.tar.gz: c5a40bbacc6a83e769ee7de2489283ce3295ab4c2a49121515a182bef315c5a56c66a691a9f70354a88126b313d9e61a3355b3064f2fa4fd787a86b44ec0c8b3
data/.env.sample ADDED
@@ -0,0 +1 @@
1
+ TEST_ACCESS_TOKEN=testaccesstoken
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .env
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sema_api_ruby.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # SemaApiRuby
2
+
3
+ Ruby client for Sema Media Data's image OCR API. Used for getting an estimated text to image ratio for validating images before being submitted to Facebook Ads.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'sema_api_ruby'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install sema_api_ruby
20
+
21
+ ## Usage
22
+
23
+ Configure the client by providing an API token. Also tweak the configuration for any of the OCR settings.
24
+
25
+ ```ruby
26
+ SemaApiRuby.configure do |config|
27
+ config.access_token = 'myaccesstokenhere'
28
+
29
+ #optional - below are the defaults
30
+ config.ocr_settings = {
31
+ lang: 'en', # english language
32
+ outform: 'json', # output as json
33
+ sp: 'True', # perform spellcheck
34
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
35
+ df: 'True', # dictionary based word filtering
36
+ noempty: 'True' # only return detected objects that are not empty
37
+ }
38
+ end
39
+
40
+ ```
41
+
42
+ Post a request to the ocr endpoint with the path to an image to get back a raw response from the api (post made using Faraday gem)
43
+ ```ruby
44
+ client = SemaApiRuby.new
45
+ file_path = '/path/to/your/image.jpg'
46
+
47
+ client.post_ocr_image(file_path)
48
+ # => #<Faraday::Response:0x007fcb641591e0 ...
49
+ ```
50
+
51
+ Use this method to both post to the ocr endpoint and return an text to image ratio
52
+ ```ruby
53
+ client = SemaApiRuby.new
54
+ file_path = '/path/to/your/image.jpg'
55
+
56
+ client.text_to_image_ratio(file_path)
57
+ # => 0.2123
58
+ ```
59
+
60
+ ## Testing
61
+
62
+ Copy the sample dotenv file to .env
63
+ ```
64
+ $> cp .env.sample .env
65
+ ```
66
+
67
+ Update it with a testing API token if you want to run specs against the live API rather than against the network stubs
68
+
69
+ Run the specs
70
+ ```
71
+ $> bundle exec rspec spec
72
+ ```
73
+
74
+ This gem uses a testing gem called VCR to record and replay HTTP requests. If the actual API has changed you will need to delete all "casettes" located in ```spec/cassettes/``` so that requests will run against the live API
75
+
76
+ ## Contributing
77
+
78
+ 1. Fork it ( https://github.com/[my-github-username]/sema_api_ruby/fork )
79
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
80
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
81
+ 4. Push to the branch (`git push origin my-new-feature`)
82
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "sema_api_ruby"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,44 @@
1
+ require 'json'
2
+
3
+ module SemaApiRuby
4
+
5
+ class Client
6
+
7
+ module Ocr
8
+
9
+ def text_to_image_ratio(_file_path)
10
+ response = post_ocr_image(_file_path)
11
+
12
+ body = JSON.parse(response.body)
13
+
14
+ raise SemaApiRuby::Client::EmptyResponse unless body['frames']
15
+
16
+ return 0.0 if body['frames'].empty?
17
+
18
+ rectangles = body['frames'].first['results']
19
+
20
+ SemaApiRuby::GetTextToImageRatio.call(image_size(_file_path), rectangles)
21
+
22
+ # workaround for bug in sema media api where they are returning invalid json
23
+ # when an image does not have any text results. Remove when they start returning
24
+ # valid json
25
+ rescue JSON::ParserError => e
26
+ if e.message =~ /unexpected token at '\]\}'/
27
+ 0.0
28
+ else
29
+ raise e
30
+ end
31
+ end
32
+
33
+ def post_ocr_image(_file_path)
34
+ post_body = ocr_settings.merge(
35
+ file: Faraday::UploadIO.new(_file_path, mime_type(_file_path))
36
+ )
37
+ conn_with_auth.post("/api/#{api_version}/ocr/", post_body)
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,61 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+ require 'fastimage'
4
+ require 'sema_api_ruby/client/ocr'
5
+
6
+ module SemaApiRuby
7
+
8
+ class Client
9
+
10
+ include SemaApiRuby::Client::Ocr
11
+
12
+ # setup accessors on this client for all config keys
13
+ attr_accessor *Configuration::VALID_CONFIG_KEYS
14
+
15
+ def initialize(options = {})
16
+ # Use options passed in, but fall back to config module defaults
17
+ merged_options = SemaApiRuby.options.merge(options)
18
+
19
+ # Copy the merged values to this client and ignore those
20
+ # not part of our configuration
21
+ SemaApiRuby::Configuration::VALID_CONFIG_KEYS.each do |key|
22
+ public_send("#{key}=", merged_options[key])
23
+ end
24
+
25
+ if access_token.nil?
26
+ raise NoTokenError, 'you must provide an access token'
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def conn
33
+ @conn ||= Faraday.new(url: api_endpoint) do |conn|
34
+ conn.request :multipart
35
+ conn.request :url_encoded
36
+ #conn.response :json
37
+
38
+ conn.adapter Faraday.default_adapter
39
+ end
40
+ end
41
+
42
+ def conn_with_auth
43
+ @conn_with_auth ||= begin
44
+ conn.headers['X-api-key'] = access_token
45
+ conn.headers['User-Agent'] = user_agent
46
+ conn.headers['Content-Type'] = content_type
47
+ conn
48
+ end
49
+ end
50
+
51
+ def mime_type(_file_path)
52
+ "image/#{FastImage.type(_file_path)}"
53
+ end
54
+
55
+ def image_size(_file_path)
56
+ FastImage.size(_file_path)
57
+ end
58
+
59
+ end
60
+
61
+ end
@@ -0,0 +1,56 @@
1
+ module SemaApiRuby
2
+
3
+ module Configuration
4
+
5
+ VALID_CONFIG_KEYS = [
6
+ :access_token,
7
+ :api_endpoint,
8
+ :api_version,
9
+ :user_agent,
10
+ :content_type,
11
+ :ocr_settings,
12
+ :debug_output
13
+ ]
14
+
15
+ DEFAULT_API_ENDPOINT = 'https://semamediadata.com/api'.freeze # 1.0/ocr/
16
+ DEFAULT_API_VERSION = 1.0
17
+ DEFAULT_USER_AGENT = "SemaApiRuby Gem #{SemaApiRuby::VERSION}".freeze
18
+ DEFAULT_CONTENT_TYPE = 'multipart/form-data'
19
+ DEFAULT_DEBUG_OUTPUT = false # $stdout
20
+
21
+ DEFAULT_OCR_SETTINGS = {
22
+ lang: 'en', # english language
23
+ outform: 'json', # output as json
24
+ sp: 'True', # perform spellcheck
25
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
26
+ df: 'True', # dictionary based word filtering
27
+ noempty: 'True' # only return detected objects that are not empty
28
+ }
29
+
30
+ attr_accessor *VALID_CONFIG_KEYS
31
+
32
+ def configure
33
+ yield self
34
+ end
35
+
36
+ def self.extended(base)
37
+ base.reset
38
+ end
39
+
40
+ def options
41
+ Hash[ *VALID_CONFIG_KEYS.map { |key| [key, public_send(key)] }.flatten ]
42
+ end
43
+
44
+ def reset
45
+ self.access_token = nil
46
+ self.api_endpoint = DEFAULT_API_ENDPOINT
47
+ self.api_version = DEFAULT_API_VERSION
48
+ self.user_agent = DEFAULT_USER_AGENT
49
+ self.content_type = DEFAULT_CONTENT_TYPE
50
+ self.debug_output = DEFAULT_DEBUG_OUTPUT
51
+ self.ocr_settings = DEFAULT_OCR_SETTINGS
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -0,0 +1,11 @@
1
+ module SemaApiRuby
2
+
3
+ class Client
4
+
5
+ class NoTokenError < StandardError; end;
6
+
7
+ class EmptyResponse < StandardError; end;
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,34 @@
1
+ module SemaApiRuby
2
+
3
+ class GetTextToImageRatio
4
+
5
+ attr_accessor :rectangles, :image_size
6
+
7
+ def self.call(image_size, rectangles)
8
+ self.new(image_size, rectangles).call
9
+ end
10
+
11
+ def initialize(image_size, rectangles)
12
+ @rectangles = rectangles
13
+ @image_size = image_size
14
+ end
15
+
16
+ def call
17
+ return 0.0 unless rectangles.any?
18
+
19
+ text_area = rectangles.inject(0.0) do |memo, rect|
20
+ memo += if rect['text'].nil? || rect['text'].empty?
21
+ 0.0
22
+ else
23
+ (rect['width'] * rect['height']).to_f
24
+ end
25
+ end
26
+
27
+ image_area = image_size[0] * image_size[1]
28
+
29
+ (text_area / image_area.to_f).round(4)
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,3 @@
1
+ module SemaApiRuby
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,30 @@
1
+ require 'sema_api_ruby/version'
2
+ require 'sema_api_ruby/configuration'
3
+ require 'sema_api_ruby/exceptions'
4
+ require 'sema_api_ruby/get_text_to_image_ratio'
5
+ require 'sema_api_ruby/client'
6
+
7
+ module SemaApiRuby
8
+
9
+ extend Configuration
10
+
11
+ def self.new(options = {})
12
+ SemaApiRuby::Client.new(options)
13
+ end
14
+
15
+ def self.root
16
+ File.dirname __dir__
17
+ end
18
+
19
+ def self.default_ocr_options
20
+ {
21
+ lang: 'en', # english language
22
+ outform: 'json', # output as json
23
+ sp: 'True', # perform spellcheck
24
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
25
+ df: 'True', # dictionary based word filtering
26
+ noempty: 'True' # only return detected objects that are not empty
27
+ }
28
+ end
29
+
30
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sema_api_ruby/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'sema_api_ruby'
8
+ spec.version = SemaApiRuby::VERSION
9
+ spec.authors = ['Jrod']
10
+ spec.email = ['jarrod@jtms.net']
11
+
12
+ spec.summary = %q{Ruby client for Sema Media Data image OCR API}
13
+ spec.description = spec.summary
14
+ spec.homepage = 'https://github.com/jspillers/sema_api_ruby'
15
+
16
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # delete this section to allow pushing this gem to any host.
18
+ #if spec.respond_to?(:metadata)
19
+ # spec.metadata['allowed_push_host'] = 'TODO: Set to "http://mygemserver.com"'
20
+ #else
21
+ # raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
22
+ #end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+
29
+ spec.add_dependency 'faraday', '~> 0.9.1'
30
+ spec.add_dependency 'faraday_middleware', '~> 0.10.0'
31
+ spec.add_dependency 'fastimage', '~> 1.7.0'
32
+
33
+ spec.add_development_dependency 'bundler', '~> 1.9'
34
+ spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'pry', '~> 0.10.1'
36
+ spec.add_development_dependency 'vcr', '~> 2.9.3'
37
+
38
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sema_api_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jrod
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday_middleware
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.10.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.10.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: fastimage
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.7.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.7.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.10.1
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.10.1
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 2.9.3
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 2.9.3
111
+ description: Ruby client for Sema Media Data image OCR API
112
+ email:
113
+ - jarrod@jtms.net
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".env.sample"
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - Gemfile
122
+ - README.md
123
+ - Rakefile
124
+ - bin/console
125
+ - bin/setup
126
+ - lib/sema_api_ruby.rb
127
+ - lib/sema_api_ruby/client.rb
128
+ - lib/sema_api_ruby/client/ocr.rb
129
+ - lib/sema_api_ruby/configuration.rb
130
+ - lib/sema_api_ruby/exceptions.rb
131
+ - lib/sema_api_ruby/get_text_to_image_ratio.rb
132
+ - lib/sema_api_ruby/version.rb
133
+ - sema_api_ruby.gemspec
134
+ homepage: https://github.com/jspillers/sema_api_ruby
135
+ licenses: []
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.4.5
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: Ruby client for Sema Media Data image OCR API
157
+ test_files: []