embulk-filter-azure_computer_vision_api 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml ADDED
@@ -0,0 +1,7 @@
1
+ ---
2
+ SHA1:
3
+ metadata.gz: 86ec03fac808603ea5cd40ad2e7be014252d1964
4
+ data.tar.gz: 26cb69047971f9a1ddef8b64a2f2d43cf9fa0c2e
5
+ SHA512:
6
+ metadata.gz: 4f2a2459e990a675dfe0714a6ef17fbf5d29c3654bcc7416e45f043dfde6a625daf4e680fe486b069e168a7e2d9805395590f44d2859ff99171a1b70ffbd9e29
7
+ data.tar.gz: c3fff88c00c015faf52f5648fe479ac2f4bb0242ebeda93e3ae031d8f53f0715bd1a3962966a8720e00dd28440c19906bc21d0aac565758adbf3eb12cd750b8b
data/.gitignore ADDED
@@ -0,0 +1,5 @@
1
+ *~
2
+ /pkg/
3
+ /tmp/
4
+ /.bundle/
5
+ /Gemfile.lock
data/.ruby-version ADDED
@@ -0,0 +1 @@
1
+ jruby-9.1.5.0
data/Gemfile ADDED
@@ -0,0 +1,2 @@
1
+ source 'https://rubygems.org/'
2
+ gemspec
data/LICENSE.txt ADDED
@@ -0,0 +1,21 @@
1
+
2
+ MIT License
3
+
4
+ Permission is hereby granted, free of charge, to any person obtaining
5
+ a copy of this software and associated documentation files (the
6
+ "Software"), to deal in the Software without restriction, including
7
+ without limitation the rights to use, copy, modify, merge, publish,
8
+ distribute, sublicense, and/or sell copies of the Software, and to
9
+ permit persons to whom the Software is furnished to do so, subject to
10
+ the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be
13
+ included in all copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16
+ EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17
+ MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18
+ NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE
19
+ LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION
20
+ OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION
21
+ WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
data/README.md ADDED
@@ -0,0 +1,35 @@
1
+ # Azure Computer Vision Api filter plugin for Embulk
2
+
3
+
4
+ ## Overview
5
+
6
+ * **Plugin type**: filter
7
+
8
+ ## Configuration
9
+
10
+ - **api_type**: api_type(string)
11
+ - **out_key_name**: out_key_name(string)
12
+ - **image_path_key_name**: image_path_key_name(string)
13
+ - **params**: params(hash, default: {})
14
+ - **delay**: delay(integer, default: 0)
15
+ - **retry_wait**: retry_wait(integer, default: 10)
16
+ - **subscription_key**: subscription_key(string)
17
+
18
+ ## Example
19
+
20
+ ```yaml
21
+ - type: azure_computer_vision_api
22
+ api_type: ocr
23
+ image_path_key_name: image_path
24
+ out_key_name: image_info
25
+ # params:
26
+ # language: "ja"
27
+ # detectOrientation: true
28
+ subscription_key: XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX
29
+ ```
30
+
31
+ ## Build
32
+
33
+ ```
34
+ $ rake
35
+ ```
data/Rakefile ADDED
@@ -0,0 +1,3 @@
1
+ require "bundler/gem_tasks"
2
+
3
+ task default: :build
@@ -0,0 +1,20 @@
1
+
2
+ Gem::Specification.new do |spec|
3
+ spec.name = "embulk-filter-azure_computer_vision_api"
4
+ spec.version = "0.1.0"
5
+ spec.authors = ["toyama0919"]
6
+ spec.summary = "Azure Computer Vision Api filter plugin for Embulk"
7
+ spec.description = "Azure Computer Vision Api"
8
+ spec.email = ["toyama0919@gmail.com"]
9
+ spec.licenses = ["MIT"]
10
+ spec.homepage = "https://github.com/toyama0919/embulk-filter-azure_computer_vision_api"
11
+
12
+ spec.files = `git ls-files`.split("\n") + Dir["classpath/*.jar"]
13
+ spec.test_files = spec.files.grep(%r{^(test|spec)/})
14
+ spec.require_paths = ["lib"]
15
+
16
+ spec.add_dependency 'jruby-openssl'
17
+ spec.add_development_dependency 'embulk', ['>= 0.8.15']
18
+ spec.add_development_dependency 'bundler', ['>= 1.10.6']
19
+ spec.add_development_dependency 'rake', ['>= 10.0']
20
+ end
@@ -0,0 +1,58 @@
1
+ require_relative 'azure_computer_vision_api/computer_vision_client'
2
+
3
+ module Embulk
4
+ module Filter
5
+ class AzureComputerVisionApi < FilterPlugin
6
+ Plugin.register_filter("azure_computer_vision_api", self)
7
+
8
+ def self.transaction(config, in_schema, &control)
9
+ task = {
10
+ "api_type" => config.param("api_type", :string),
11
+ "out_key_name" => config.param("out_key_name", :string),
12
+ "image_path_key_name" => config.param("image_path_key_name", :string),
13
+ "params" => config.param("params", :hash, default: {}),
14
+ "delay" => config.param("delay", :integer, default: 0),
15
+ "retry_wait" => config.param("retry_wait", :integer, default: 10),
16
+ "read_timeout" => config.param("read_timeout", :integer, default: 60),
17
+ "subscription_key" => config.param("subscription_key", :string),
18
+ }
19
+
20
+ add_columns = [
21
+ Column.new(nil, task["out_key_name"], :json)
22
+ ]
23
+
24
+ out_columns = in_schema + add_columns
25
+
26
+ yield(task, out_columns)
27
+ end
28
+
29
+ def init
30
+ @image_path_key_name = task['image_path_key_name']
31
+ @delay = task['delay']
32
+ @client = ComputerVisionClient.new(
33
+ params: task['params'],
34
+ subscription_key: task['subscription_key'],
35
+ retry_wait: task['retry_wait'],
36
+ read_timeout: task['read_timeout'],
37
+ api_type: task['api_type']
38
+ )
39
+ end
40
+
41
+ def close
42
+ end
43
+
44
+ def add(page)
45
+ page.each do |record|
46
+ hash = Hash[in_schema.names.zip(record)]
47
+ response = @client.request(hash[@image_path_key_name])
48
+ page_builder.add(hash.values + [response])
49
+ sleep @delay
50
+ end
51
+ end
52
+
53
+ def finish
54
+ page_builder.finish
55
+ end
56
+ end
57
+ end
58
+ end
@@ -0,0 +1,70 @@
1
+ require "json"
2
+ require "net/http"
3
+ require "uri"
4
+ require "pp"
5
+ require "openssl"
6
+
7
+ module Embulk
8
+ module Filter
9
+ class AzureComputerVisionApi < FilterPlugin
10
+ class ComputerVisionClient
11
+ ENDPOINT_PREFIX = "https://api.projectoxford.ai/vision/v1.0"
12
+ RETRY_WAIT_REGEXP = /Rate limit is exceeded. Try again in ([\d]+) seconds./
13
+
14
+ def initialize(params: , subscription_key:, api_type: , retry_wait:, read_timeout:)
15
+ Embulk.logger.info("api type => #{api_type}")
16
+
17
+ uri = URI.parse("#{ENDPOINT_PREFIX}/#{api_type}")
18
+ uri.query = URI.encode_www_form(params)
19
+ @request = Net::HTTP::Post.new(uri.request_uri)
20
+ @request['Ocp-Apim-Subscription-Key'] = subscription_key
21
+ @http = Net::HTTP.new(uri.host, uri.port)
22
+ @http.read_timeout = read_timeout
23
+ @http.use_ssl = true
24
+ @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
25
+ @retry_wait = retry_wait
26
+ end
27
+
28
+ def request(image_path)
29
+ content_type, body = get_content_type_and_body(image_path)
30
+ @request['Content-Type'] = content_type
31
+ @request.body = body
32
+
33
+ Embulk.logger.info("processing => #{image_path}")
34
+
35
+ begin
36
+ loop do
37
+ response = @http.start do |http|
38
+ response_body = http.request(@request).body
39
+
40
+ Embulk.logger.debug("response body => #{response_body}")
41
+ JSON.parse(response_body)
42
+ end
43
+ if response.key?('statusCode')
44
+ if response['statusCode'] == 429
45
+ sec = response['message'].match(RETRY_WAIT_REGEXP)
46
+ end
47
+ Embulk.logger.warn("response error => #{response}")
48
+ sleep (sec ? sec[1].to_i + 1 : @retry_wait)
49
+ else
50
+ return response
51
+ end
52
+ end
53
+ rescue => e
54
+ Embulk.logger.error "\n#{e.message}\n#{e.backtrace.join("\n")}"
55
+ end
56
+ end
57
+
58
+ private
59
+ def get_content_type_and_body(image_path)
60
+ if image_path =~ /https?\:\/\//
61
+ return 'application/json', { url: image_path || '' }.to_json
62
+ else
63
+ content = File.read(image_path) rescue ''
64
+ return 'application/octet-stream', content
65
+ end
66
+ end
67
+ end
68
+ end
69
+ end
70
+ end
metadata ADDED
@@ -0,0 +1,109 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: embulk-filter-azure_computer_vision_api
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - toyama0919
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2017-01-27 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ requirement: !ruby/object:Gem::Requirement
15
+ requirements:
16
+ - - ">="
17
+ - !ruby/object:Gem::Version
18
+ version: '0'
19
+ name: jruby-openssl
20
+ prerelease: false
21
+ type: :runtime
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - ">="
25
+ - !ruby/object:Gem::Version
26
+ version: '0'
27
+ - !ruby/object:Gem::Dependency
28
+ requirement: !ruby/object:Gem::Requirement
29
+ requirements:
30
+ - - ">="
31
+ - !ruby/object:Gem::Version
32
+ version: 0.8.15
33
+ name: embulk
34
+ prerelease: false
35
+ type: :development
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - ">="
39
+ - !ruby/object:Gem::Version
40
+ version: 0.8.15
41
+ - !ruby/object:Gem::Dependency
42
+ requirement: !ruby/object:Gem::Requirement
43
+ requirements:
44
+ - - ">="
45
+ - !ruby/object:Gem::Version
46
+ version: 1.10.6
47
+ name: bundler
48
+ prerelease: false
49
+ type: :development
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - ">="
53
+ - !ruby/object:Gem::Version
54
+ version: 1.10.6
55
+ - !ruby/object:Gem::Dependency
56
+ requirement: !ruby/object:Gem::Requirement
57
+ requirements:
58
+ - - ">="
59
+ - !ruby/object:Gem::Version
60
+ version: '10.0'
61
+ name: rake
62
+ prerelease: false
63
+ type: :development
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - ">="
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ description: Azure Computer Vision Api
70
+ email:
71
+ - toyama0919@gmail.com
72
+ executables: []
73
+ extensions: []
74
+ extra_rdoc_files: []
75
+ files:
76
+ - ".gitignore"
77
+ - ".ruby-version"
78
+ - Gemfile
79
+ - LICENSE.txt
80
+ - README.md
81
+ - Rakefile
82
+ - embulk-filter-azure_computer_vision_api.gemspec
83
+ - lib/embulk/filter/azure_computer_vision_api.rb
84
+ - lib/embulk/filter/azure_computer_vision_api/computer_vision_client.rb
85
+ homepage: https://github.com/toyama0919/embulk-filter-azure_computer_vision_api
86
+ licenses:
87
+ - MIT
88
+ metadata: {}
89
+ post_install_message:
90
+ rdoc_options: []
91
+ require_paths:
92
+ - lib
93
+ required_ruby_version: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - ">="
96
+ - !ruby/object:Gem::Version
97
+ version: '0'
98
+ required_rubygems_version: !ruby/object:Gem::Requirement
99
+ requirements:
100
+ - - ">="
101
+ - !ruby/object:Gem::Version
102
+ version: '0'
103
+ requirements: []
104
+ rubyforge_project:
105
+ rubygems_version: 2.6.6
106
+ signing_key:
107
+ specification_version: 4
108
+ summary: Azure Computer Vision Api filter plugin for Embulk
109
+ test_files: []