sema_api_ruby 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 5da03aaaec70b3e9e5bf3195c9c9552e348c9689
4
+ data.tar.gz: 3b2b8317183a67ee898ba0735bc67d569b8a8780
5
+ SHA512:
6
+ metadata.gz: 5826f9f85f98f38747a69adf8981de0c70d57b1d12e7c60bb4df0856ee47fc89e9e1cce7ee224f4ac14eb86ce5f91e86ef437d2388ec5c52fc38e1b23202b8b4
7
+ data.tar.gz: c5a40bbacc6a83e769ee7de2489283ce3295ab4c2a49121515a182bef315c5a56c66a691a9f70354a88126b313d9e61a3355b3064f2fa4fd787a86b44ec0c8b3
data/.env.sample ADDED
@@ -0,0 +1 @@
1
+ TEST_ACCESS_TOKEN=testaccesstoken
data/.gitignore ADDED
@@ -0,0 +1,10 @@
1
+ /.bundle/
2
+ /.yardoc
3
+ /Gemfile.lock
4
+ /_yardoc/
5
+ /coverage/
6
+ /doc/
7
+ /pkg/
8
+ /spec/reports/
9
+ /tmp/
10
+ .env
data/.rspec ADDED
@@ -0,0 +1,2 @@
1
+ --format documentation
2
+ --color
data/Gemfile ADDED
@@ -0,0 +1,4 @@
1
+ source 'https://rubygems.org'
2
+
3
+ # Specify your gem's dependencies in sema_api_ruby.gemspec
4
+ gemspec
data/README.md ADDED
@@ -0,0 +1,82 @@
1
+ # SemaApiRuby
2
+
3
+ Ruby client for Sema Media Data's image OCR API. Used for getting an estimated text to image ratio for validating images before being submitted to Facebook Ads.
4
+
5
+ ## Installation
6
+
7
+ Add this line to your application's Gemfile:
8
+
9
+ ```ruby
10
+ gem 'sema_api_ruby'
11
+ ```
12
+
13
+ And then execute:
14
+
15
+ $ bundle
16
+
17
+ Or install it yourself as:
18
+
19
+ $ gem install sema_api_ruby
20
+
21
+ ## Usage
22
+
23
+ Configure the client by providing an API token. Also tweak the configuration for any of the OCR settings.
24
+
25
+ ```ruby
26
+ SemaApiRuby.configure do |config|
27
+ config.access_token = 'myaccesstokenhere'
28
+
29
+ #optional - below are the defaults
30
+ config.ocr_settings = {
31
+ lang: 'en', # english language
32
+ outform: 'json', # output as json
33
+ sp: 'True', # perform spellcheck
34
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
35
+ df: 'True', # dictionary based word filtering
36
+ noempty: 'True' # only return detected objects that are not empty
37
+ }
38
+ end
39
+
40
+ ```
41
+
42
+ Post a request to the ocr endpoint with the path to an image to get back a raw response from the api (post made using Faraday gem)
43
+ ```ruby
44
+ client = SemaApiRuby.new
45
+ file_path = '/path/to/your/image.jpg'
46
+
47
+ client.post_ocr_image(file_path)
48
+ # => #<Faraday::Response:0x007fcb641591e0 ...
49
+ ```
50
+
51
+ Use this method to both post to the ocr endpoint and return an text to image ratio
52
+ ```ruby
53
+ client = SemaApiRuby.new
54
+ file_path = '/path/to/your/image.jpg'
55
+
56
+ client.text_to_image_ratio(file_path)
57
+ # => 0.2123
58
+ ```
59
+
60
+ ## Testing
61
+
62
+ Copy the sample dotenv file to .env
63
+ ```
64
+ $> cp .env.sample .env
65
+ ```
66
+
67
+ Update it with a testing API token if you want to run specs against the live API rather than against the network stubs
68
+
69
+ Run the specs
70
+ ```
71
+ $> bundle exec rspec spec
72
+ ```
73
+
74
+ This gem uses a testing gem called VCR to record and replay HTTP requests. If the actual API has changed you will need to delete all "casettes" located in ```spec/cassettes/``` so that requests will run against the live API
75
+
76
+ ## Contributing
77
+
78
+ 1. Fork it ( https://github.com/[my-github-username]/sema_api_ruby/fork )
79
+ 2. Create your feature branch (`git checkout -b my-new-feature`)
80
+ 3. Commit your changes (`git commit -am 'Add some feature'`)
81
+ 4. Push to the branch (`git push origin my-new-feature`)
82
+ 5. Create a new Pull Request
data/Rakefile ADDED
@@ -0,0 +1 @@
1
+ require "bundler/gem_tasks"
data/bin/console ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require "bundler/setup"
4
+ require "sema_api_ruby"
5
+
6
+ # You can add fixtures and/or initialization code here to make experimenting
7
+ # with your gem easier. You can also use a different console, if you like.
8
+
9
+ # (If you use this, don't forget to add pry to your Gemfile!)
10
+ # require "pry"
11
+ # Pry.start
12
+
13
+ require "irb"
14
+ IRB.start
data/bin/setup ADDED
@@ -0,0 +1,7 @@
1
+ #!/bin/bash
2
+ set -euo pipefail
3
+ IFS=$'\n\t'
4
+
5
+ bundle install
6
+
7
+ # Do any other automated setup that you need to do here
@@ -0,0 +1,44 @@
1
+ require 'json'
2
+
3
+ module SemaApiRuby
4
+
5
+ class Client
6
+
7
+ module Ocr
8
+
9
+ def text_to_image_ratio(_file_path)
10
+ response = post_ocr_image(_file_path)
11
+
12
+ body = JSON.parse(response.body)
13
+
14
+ raise SemaApiRuby::Client::EmptyResponse unless body['frames']
15
+
16
+ return 0.0 if body['frames'].empty?
17
+
18
+ rectangles = body['frames'].first['results']
19
+
20
+ SemaApiRuby::GetTextToImageRatio.call(image_size(_file_path), rectangles)
21
+
22
+ # workaround for bug in sema media api where they are returning invalid json
23
+ # when an image does not have any text results. Remove when they start returning
24
+ # valid json
25
+ rescue JSON::ParserError => e
26
+ if e.message =~ /unexpected token at '\]\}'/
27
+ 0.0
28
+ else
29
+ raise e
30
+ end
31
+ end
32
+
33
+ def post_ocr_image(_file_path)
34
+ post_body = ocr_settings.merge(
35
+ file: Faraday::UploadIO.new(_file_path, mime_type(_file_path))
36
+ )
37
+ conn_with_auth.post("/api/#{api_version}/ocr/", post_body)
38
+ end
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
@@ -0,0 +1,61 @@
1
+ require 'faraday'
2
+ require 'faraday_middleware'
3
+ require 'fastimage'
4
+ require 'sema_api_ruby/client/ocr'
5
+
6
+ module SemaApiRuby
7
+
8
+ class Client
9
+
10
+ include SemaApiRuby::Client::Ocr
11
+
12
+ # setup accessors on this client for all config keys
13
+ attr_accessor *Configuration::VALID_CONFIG_KEYS
14
+
15
+ def initialize(options = {})
16
+ # Use options passed in, but fall back to config module defaults
17
+ merged_options = SemaApiRuby.options.merge(options)
18
+
19
+ # Copy the merged values to this client and ignore those
20
+ # not part of our configuration
21
+ SemaApiRuby::Configuration::VALID_CONFIG_KEYS.each do |key|
22
+ public_send("#{key}=", merged_options[key])
23
+ end
24
+
25
+ if access_token.nil?
26
+ raise NoTokenError, 'you must provide an access token'
27
+ end
28
+ end
29
+
30
+ private
31
+
32
+ def conn
33
+ @conn ||= Faraday.new(url: api_endpoint) do |conn|
34
+ conn.request :multipart
35
+ conn.request :url_encoded
36
+ #conn.response :json
37
+
38
+ conn.adapter Faraday.default_adapter
39
+ end
40
+ end
41
+
42
+ def conn_with_auth
43
+ @conn_with_auth ||= begin
44
+ conn.headers['X-api-key'] = access_token
45
+ conn.headers['User-Agent'] = user_agent
46
+ conn.headers['Content-Type'] = content_type
47
+ conn
48
+ end
49
+ end
50
+
51
+ def mime_type(_file_path)
52
+ "image/#{FastImage.type(_file_path)}"
53
+ end
54
+
55
+ def image_size(_file_path)
56
+ FastImage.size(_file_path)
57
+ end
58
+
59
+ end
60
+
61
+ end
@@ -0,0 +1,56 @@
1
+ module SemaApiRuby
2
+
3
+ module Configuration
4
+
5
+ VALID_CONFIG_KEYS = [
6
+ :access_token,
7
+ :api_endpoint,
8
+ :api_version,
9
+ :user_agent,
10
+ :content_type,
11
+ :ocr_settings,
12
+ :debug_output
13
+ ]
14
+
15
+ DEFAULT_API_ENDPOINT = 'https://semamediadata.com/api'.freeze # 1.0/ocr/
16
+ DEFAULT_API_VERSION = 1.0
17
+ DEFAULT_USER_AGENT = "SemaApiRuby Gem #{SemaApiRuby::VERSION}".freeze
18
+ DEFAULT_CONTENT_TYPE = 'multipart/form-data'
19
+ DEFAULT_DEBUG_OUTPUT = false # $stdout
20
+
21
+ DEFAULT_OCR_SETTINGS = {
22
+ lang: 'en', # english language
23
+ outform: 'json', # output as json
24
+ sp: 'True', # perform spellcheck
25
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
26
+ df: 'True', # dictionary based word filtering
27
+ noempty: 'True' # only return detected objects that are not empty
28
+ }
29
+
30
+ attr_accessor *VALID_CONFIG_KEYS
31
+
32
+ def configure
33
+ yield self
34
+ end
35
+
36
+ def self.extended(base)
37
+ base.reset
38
+ end
39
+
40
+ def options
41
+ Hash[ *VALID_CONFIG_KEYS.map { |key| [key, public_send(key)] }.flatten ]
42
+ end
43
+
44
+ def reset
45
+ self.access_token = nil
46
+ self.api_endpoint = DEFAULT_API_ENDPOINT
47
+ self.api_version = DEFAULT_API_VERSION
48
+ self.user_agent = DEFAULT_USER_AGENT
49
+ self.content_type = DEFAULT_CONTENT_TYPE
50
+ self.debug_output = DEFAULT_DEBUG_OUTPUT
51
+ self.ocr_settings = DEFAULT_OCR_SETTINGS
52
+ end
53
+
54
+ end
55
+
56
+ end
@@ -0,0 +1,11 @@
1
+ module SemaApiRuby
2
+
3
+ class Client
4
+
5
+ class NoTokenError < StandardError; end;
6
+
7
+ class EmptyResponse < StandardError; end;
8
+
9
+ end
10
+
11
+ end
@@ -0,0 +1,34 @@
1
+ module SemaApiRuby
2
+
3
+ class GetTextToImageRatio
4
+
5
+ attr_accessor :rectangles, :image_size
6
+
7
+ def self.call(image_size, rectangles)
8
+ self.new(image_size, rectangles).call
9
+ end
10
+
11
+ def initialize(image_size, rectangles)
12
+ @rectangles = rectangles
13
+ @image_size = image_size
14
+ end
15
+
16
+ def call
17
+ return 0.0 unless rectangles.any?
18
+
19
+ text_area = rectangles.inject(0.0) do |memo, rect|
20
+ memo += if rect['text'].nil? || rect['text'].empty?
21
+ 0.0
22
+ else
23
+ (rect['width'] * rect['height']).to_f
24
+ end
25
+ end
26
+
27
+ image_area = image_size[0] * image_size[1]
28
+
29
+ (text_area / image_area.to_f).round(4)
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,3 @@
1
+ module SemaApiRuby
2
+ VERSION = "0.1.0"
3
+ end
@@ -0,0 +1,30 @@
1
+ require 'sema_api_ruby/version'
2
+ require 'sema_api_ruby/configuration'
3
+ require 'sema_api_ruby/exceptions'
4
+ require 'sema_api_ruby/get_text_to_image_ratio'
5
+ require 'sema_api_ruby/client'
6
+
7
+ module SemaApiRuby
8
+
9
+ extend Configuration
10
+
11
+ def self.new(options = {})
12
+ SemaApiRuby::Client.new(options)
13
+ end
14
+
15
+ def self.root
16
+ File.dirname __dir__
17
+ end
18
+
19
+ def self.default_ocr_options
20
+ {
21
+ lang: 'en', # english language
22
+ outform: 'json', # output as json
23
+ sp: 'True', # perform spellcheck
24
+ mh: 'True', # multi-hypothesis... increases accuracy at slight performance cost
25
+ df: 'True', # dictionary based word filtering
26
+ noempty: 'True' # only return detected objects that are not empty
27
+ }
28
+ end
29
+
30
+ end
@@ -0,0 +1,38 @@
1
+ # coding: utf-8
2
+ lib = File.expand_path('../lib', __FILE__)
3
+ $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
+ require 'sema_api_ruby/version'
5
+
6
+ Gem::Specification.new do |spec|
7
+ spec.name = 'sema_api_ruby'
8
+ spec.version = SemaApiRuby::VERSION
9
+ spec.authors = ['Jrod']
10
+ spec.email = ['jarrod@jtms.net']
11
+
12
+ spec.summary = %q{Ruby client for Sema Media Data image OCR API}
13
+ spec.description = spec.summary
14
+ spec.homepage = 'https://github.com/jspillers/sema_api_ruby'
15
+
16
+ # Prevent pushing this gem to RubyGems.org by setting 'allowed_push_host', or
17
+ # delete this section to allow pushing this gem to any host.
18
+ #if spec.respond_to?(:metadata)
19
+ # spec.metadata['allowed_push_host'] = 'TODO: Set to "http://mygemserver.com"'
20
+ #else
21
+ # raise 'RubyGems 2.0 or newer is required to protect against public gem pushes.'
22
+ #end
23
+
24
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
+ spec.bindir = 'exe'
26
+ spec.executables = spec.files.grep(%r{^exe/}) { |f| File.basename(f) }
27
+ spec.require_paths = ['lib']
28
+
29
+ spec.add_dependency 'faraday', '~> 0.9.1'
30
+ spec.add_dependency 'faraday_middleware', '~> 0.10.0'
31
+ spec.add_dependency 'fastimage', '~> 1.7.0'
32
+
33
+ spec.add_development_dependency 'bundler', '~> 1.9'
34
+ spec.add_development_dependency 'rake', '~> 10.0'
35
+ spec.add_development_dependency 'pry', '~> 0.10.1'
36
+ spec.add_development_dependency 'vcr', '~> 2.9.3'
37
+
38
+ end
metadata ADDED
@@ -0,0 +1,157 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: sema_api_ruby
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Jrod
8
+ autorequire:
9
+ bindir: exe
10
+ cert_chain: []
11
+ date: 2015-08-13 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: faraday
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 0.9.1
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 0.9.1
27
+ - !ruby/object:Gem::Dependency
28
+ name: faraday_middleware
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.10.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.10.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: fastimage
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 1.7.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 1.7.0
55
+ - !ruby/object:Gem::Dependency
56
+ name: bundler
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '1.9'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '1.9'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '10.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '10.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: pry
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: 0.10.1
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: 0.10.1
97
+ - !ruby/object:Gem::Dependency
98
+ name: vcr
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 2.9.3
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 2.9.3
111
+ description: Ruby client for Sema Media Data image OCR API
112
+ email:
113
+ - jarrod@jtms.net
114
+ executables: []
115
+ extensions: []
116
+ extra_rdoc_files: []
117
+ files:
118
+ - ".env.sample"
119
+ - ".gitignore"
120
+ - ".rspec"
121
+ - Gemfile
122
+ - README.md
123
+ - Rakefile
124
+ - bin/console
125
+ - bin/setup
126
+ - lib/sema_api_ruby.rb
127
+ - lib/sema_api_ruby/client.rb
128
+ - lib/sema_api_ruby/client/ocr.rb
129
+ - lib/sema_api_ruby/configuration.rb
130
+ - lib/sema_api_ruby/exceptions.rb
131
+ - lib/sema_api_ruby/get_text_to_image_ratio.rb
132
+ - lib/sema_api_ruby/version.rb
133
+ - sema_api_ruby.gemspec
134
+ homepage: https://github.com/jspillers/sema_api_ruby
135
+ licenses: []
136
+ metadata: {}
137
+ post_install_message:
138
+ rdoc_options: []
139
+ require_paths:
140
+ - lib
141
+ required_ruby_version: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - ">="
144
+ - !ruby/object:Gem::Version
145
+ version: '0'
146
+ required_rubygems_version: !ruby/object:Gem::Requirement
147
+ requirements:
148
+ - - ">="
149
+ - !ruby/object:Gem::Version
150
+ version: '0'
151
+ requirements: []
152
+ rubyforge_project:
153
+ rubygems_version: 2.4.5
154
+ signing_key:
155
+ specification_version: 4
156
+ summary: Ruby client for Sema Media Data image OCR API
157
+ test_files: []