gini-api 0.9.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,284 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'logger'
4
+ require 'faraday'
5
+ require 'benchmark'
6
+
7
+ module Gini
8
+ module Api
9
+
10
+ # Main class to operate on the Gini API
11
+ #
12
+ class Client
13
+
14
+ attr_reader :token, :log
15
+
16
+ # Instantiate a new Gini::Api::Client object with OAuth capabilities
17
+ #
18
+ # @param [Hash] options Hash of available config settings
19
+ # @option options [String] :client_id OAuth client_id
20
+ # @option options [String] :client_secret OAuth client_secret
21
+ # @option options [String] :oauth_site OAuth site to connect to (https://user.gini.net)
22
+ # @option options [String] :oauth_redirect Redirect URI
23
+ # @option options [Integer] :upload_timeout Upload timeout in seconds
24
+ # @option options [Integer] :processing_timeout API operational timeout in seconds
25
+ # @option options [String] :api_uri API URI (https://api.gini.net)
26
+ # @option options [String] :api_version API version to use (v1)
27
+ # @option options [Logger] :log logger object to use (initialized with STDOUT otherwise)
28
+ #
29
+ # @example
30
+ # api = Gini::Api::Client.new(
31
+ # client_id: 'my_client_id',
32
+ # client_secret: 'my_client_secret',
33
+ # )
34
+ #
35
+ def initialize(options = {})
36
+ opts = {
37
+ oauth_site: 'https://user.gini.net/',
38
+ oauth_redirect: 'http://localhost',
39
+ api_uri: 'https://api.gini.net',
40
+ api_version: 'v1',
41
+ api_type: 'json',
42
+ upload_timeout: 90,
43
+ processing_timeout: 180,
44
+ log: Logger.new(STDOUT),
45
+ }.merge(options)
46
+
47
+ # Ensure mandatory keys are set
48
+ [:client_id, :client_secret].each do |k|
49
+ raise Gini::Api::Error.new("Mandatory option key is missing: #{k}") unless opts.key?(k)
50
+ end
51
+
52
+ # Populate instance variables from merged opts
53
+ opts.each do |k, v|
54
+ instance_variable_set("@#{k}", v)
55
+ self.class.send(:attr_reader, k)
56
+ end
57
+
58
+ # Ensure STDOUT is flushed
59
+ STDOUT.sync = true
60
+
61
+ # Sanitize api_uri
62
+ @api_uri.sub!(/(\/)+$/, '')
63
+
64
+ # Create upload connection
65
+ @upload_connection = Faraday.new(url: @api_uri) do |builder|
66
+ builder.use(Faraday::Request::Multipart)
67
+ builder.use(Faraday::Request::UrlEncoded)
68
+ builder.request(:retry, 3)
69
+ builder.adapter(Faraday.default_adapter)
70
+ end
71
+
72
+ # Register parser (json+xml) based on API version
73
+ register_parser
74
+
75
+ @log.info('Gini API client initialized')
76
+ @log.info("Target: #{@api_uri}")
77
+ end
78
+
79
+ # Register OAuth2 response parser
80
+ #
81
+ def register_parser
82
+ OAuth2::Response.register_parser(:gini_json, [version_header(:json)[:accept]]) do |body|
83
+ MultiJson.load(body, symbolize_keys: true) rescue body
84
+ end
85
+ OAuth2::Response.register_parser(:gini_xml, [version_header(:xml)[:accept]]) do |body|
86
+ MultiXml.parse(body) rescue body
87
+ end
88
+ end
89
+
90
+ # Acquire OAuth2 token and popolate @oauth (instance of Gini::Api::OAuth.new)
91
+ # and @token (OAuth2::AccessToken). Supports 2 strategies: username/password and authorization code
92
+ #
93
+ # @param [Hash] opts Your authorization credentials
94
+ # @option opts [String] :auth_code OAuth authorization code. Will be exchanged for a token
95
+ # @option opts [String] :username API username
96
+ # @option opts [String] :password API password
97
+ #
98
+ # @example
99
+ # api.login(auth_code: '1234567890')
100
+ # @example
101
+ # api.login(username: 'me@example.com', password: 'secret')
102
+ #
103
+ def login(opts)
104
+ @oauth = Gini::Api::OAuth.new(self, opts)
105
+ @token = @oauth.token
106
+ end
107
+
108
+ # Destroy OAuth2 token
109
+ #
110
+ def logout
111
+ @oauth.destroy
112
+ end
113
+
114
+ # Version accept header based on @api_version
115
+ #
116
+ # @param [Symbol, String] type Expected response type (:xml, :json)
117
+ #
118
+ # @return [Hash] Return accept header or empty hash
119
+ #
120
+ def version_header(type = @api_type)
121
+ { accept: "application/vnd.gini.#{@api_version}+#{type}" }
122
+ end
123
+
124
+ # Request wrapper that sets URI and accept header
125
+ #
126
+ # @param [Symbol] verb HTTP request verb (:get, :post, :put, :delete)
127
+ # @param [String] resource API resource like /documents
128
+ # @param [Hash] options Optional type and custom headers
129
+ # @option options [String] :type Type to pass to version_header (:xml, :json)
130
+ # @option options [Hash] :headers Custom headers. Must include accept
131
+ #
132
+ def request(verb, resource, options = {})
133
+ opts = {
134
+ headers: version_header(options.delete(:type) || @api_type)
135
+ }.merge(options)
136
+
137
+ timeout(@processing_timeout) do
138
+ @token.send(verb.to_sym, @api_uri + URI.parse(resource).path, opts)
139
+ end
140
+ rescue OAuth2::Error => e
141
+ raise Gini::Api::RequestError.new(
142
+ "API request failed: #{verb} #{resource} (code=#{e.response.status})",
143
+ e.response
144
+ )
145
+ rescue Timeout::Error => e
146
+ raise Gini::Api::ProcessingError.new(
147
+ "API request timed out: #{verb} #{resource} (#{e.message})"
148
+ )
149
+ end
150
+
151
+ # Upload a document
152
+ #
153
+ # @param [String] file path of the document to upload
154
+ #
155
+ # @return [Gini::Api::Document] Return Gini::Api::Document object for uploaded document
156
+ #
157
+ # @example Upload and wait for completion
158
+ # doc = api.upload('/tmp/myfile.pdf')
159
+ # @example Upload and monitor progress
160
+ # doc = api.upload('/tmp/myfile.pdf') { |d| puts "Progress: #{d.progress}" }
161
+ #
162
+ def upload(file, &block)
163
+ @log.info("Uploading #{file}")
164
+
165
+ duration = {}
166
+
167
+ # Document upload
168
+ duration[:upload] = Benchmark.realtime do
169
+ @response = @upload_connection.post do |req|
170
+ req.options[:timeout] = @upload_timeout
171
+ req.url 'documents/'
172
+ req.headers['Content-Type'] = 'multipart/form-data'
173
+ req.headers['Authorization'] = "Bearer #{@token.token}"
174
+ req.headers.merge!(version_header)
175
+ req.body = { file: Faraday::UploadIO.new(file, 'application/octet-stream') }
176
+ end
177
+ end
178
+
179
+ # Start polling (0.5s) when document has been uploaded successfully
180
+ if @response.status == 201
181
+ location = @response.headers['location']
182
+ doc = Gini::Api::Document.new(self, location)
183
+ begin
184
+ timeout(@processing_timeout) do
185
+ duration[:processing] = Benchmark.realtime do
186
+ doc.poll(&block)
187
+ end
188
+ end
189
+ rescue Timeout::Error => e
190
+ ex = Gini::Api::ProcessingError.new(e.message)
191
+ ex.docid = doc.id
192
+ raise ex
193
+ end
194
+ else
195
+ raise Gini::Api::UploadError.new(
196
+ "Document upload failed with HTTP code #{@response.status}",
197
+ @response
198
+ )
199
+ end
200
+
201
+ # Combine duration values and update doc object
202
+ duration[:total] = duration[:upload] + duration[:processing]
203
+ doc.duration = duration
204
+
205
+ doc
206
+ end
207
+
208
+ # Delete document
209
+ #
210
+ # @param [String] id document ID
211
+ #
212
+ def delete(id)
213
+ response = request(:delete, "/documents/#{id}")
214
+ unless response.status == 204
215
+ raise Gini::Api::DocumentError.new(
216
+ "Deletion of docId #{id} failed (code=#{response.status})",
217
+ response
218
+ )
219
+ end
220
+ @log.info("Deleted document #{id}")
221
+ end
222
+
223
+ # Get document by Id
224
+ #
225
+ # @param [String] id document ID
226
+ #
227
+ # @return [Gini::Api::Document] Return Gini::Api::Document object
228
+ #
229
+ def get(id)
230
+ Gini::Api::Document.new(self, "/documents/#{id}")
231
+ end
232
+
233
+ # List all documents
234
+ #
235
+ # @param [Hash] options List options (offset and limit)
236
+ # @option options [Integer] :limit Maximum number of documents to return (defaults to 20)
237
+ # @option options [Integer] :offset Start offset. Defaults to 0
238
+ #
239
+ # @return [Gini::Api::DocumentSet] Returns a DocumentSet with total, offset and a list of Document objects
240
+ #
241
+ def list(options = {})
242
+ opts = { limit: 20, offset: 0 }.merge(options)
243
+ limit = Integer(opts[:limit])
244
+ offset = Integer(opts[:offset])
245
+
246
+ response = request(:get, "/documents?limit=#{limit}&next=#{offset}")
247
+ unless response.status == 200
248
+ raise Gini::Api::DocumentError.new(
249
+ "Failed to get list of documents (code=#{response.status})",
250
+ response
251
+ )
252
+ end
253
+ Gini::Api::DocumentSet.new(self, response.parsed)
254
+ end
255
+
256
+ # Fulltext search for documents
257
+ #
258
+ # @param [String, Array] query The search term(s), separated by space. Multiple terms as array
259
+ # @param [Hash] options Search options
260
+ # @option options [String] :type Only include documents with the given doctype
261
+ # @option options [Integer] :limit Number of results per page. Must be between 1 and 250. Defaults to 20
262
+ # @option options Integer] :offset Start offset. Defaults to 0
263
+ #
264
+ # @return [Gini::Api::DocumentSet] Returns a DocumentSet with total, offset and a list of Document objects
265
+ #
266
+ def search(query, options = {})
267
+ opts = { type: '', limit: 20, offset: 0 }.merge(options)
268
+ query = URI.escape(query)
269
+ type = URI.escape(opts[:type])
270
+ limit = Integer(opts[:limit])
271
+ offset = Integer(opts[:offset])
272
+
273
+ response = request(:get, "/search?q=#{query}&type=#{type}&limit=#{limit}&next=#{offset}")
274
+ unless response.status == 200
275
+ raise Gini::Api::SearchError.new(
276
+ "Search query failed with code #{response.status}",
277
+ response
278
+ )
279
+ end
280
+ Gini::Api::DocumentSet.new(self, response.parsed)
281
+ end
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,59 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Contains document related extractions
5
+ #
6
+ class Document::Extractions
7
+
8
+ attr_reader :raw
9
+
10
+ # Instantiate a new Gini::Api::Extractions object from hash
11
+ #
12
+ # @param [Gini::Api::Client] api Gini::Api::Client object
13
+ # @param [String] location Document URL
14
+ def initialize(api, location)
15
+ @api = api
16
+ @location = location
17
+
18
+ update
19
+ end
20
+
21
+ # Populate instance variables from fetched extractions
22
+ #
23
+ def update
24
+ response = @api.request(:get, @location)
25
+
26
+ unless response.status == 200
27
+ raise Gini::Api::DocumentError.new(
28
+ "Failed to fetch extractions from #{@location}",
29
+ response
30
+ )
31
+ end
32
+
33
+ # Entire response
34
+ @raw = response.parsed
35
+
36
+ response.parsed[:extractions].each do |k,v|
37
+ instance_variable_set("@#{k}", v)
38
+ self.class.send(:attr_reader, k)
39
+ end
40
+
41
+ instance_variable_set("@candidates", response.parsed[:candidates])
42
+ self.class.send(:attr_reader, :candidates)
43
+ end
44
+
45
+ # Get filed value for given extraction key
46
+ #
47
+ # @param [String] item The extractions item to get the value of
48
+ # @return [String, Integer] Returns the value from extractions hash
49
+ #
50
+ def [](item)
51
+ unless instance_variable_get("@#{item}")
52
+ raise Gini::Api::DocumentError.new("Invalid extraction key #{item}: Not found")
53
+ end
54
+
55
+ instance_variable_get("@#{item}")[:value]
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,50 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Contains document layout in XML & JSON
5
+ #
6
+ class Document::Layout
7
+
8
+ # Instantiate a new Gini::Api::Layout object from layout url
9
+ #
10
+ # @param [Gini::Api::Client] api Gini::Api::Client object
11
+ # @param [String] location Document URL
12
+ def initialize(api, location)
13
+ @api = api
14
+ @location = location
15
+ end
16
+
17
+ # Return layout as XML string
18
+ #
19
+ # @return [String] Returns the layout as XML string
20
+ def to_xml
21
+ @xml ||= get_xml
22
+ end
23
+
24
+ # Return layout as JSON string
25
+ #
26
+ # @return [String] Returns the layout as JSON string
27
+ def to_json
28
+ @json ||= get_json
29
+ end
30
+
31
+ private
32
+
33
+ # Get value of layout in XML
34
+ #
35
+ # @return [String] Returns layout XML
36
+ def get_xml
37
+ response = @api.request(:get, @location, type: 'xml')
38
+ response.body if response.status == 200
39
+ end
40
+
41
+ # Get value of extraction. Convinience method
42
+ #
43
+ # @return [String] Returns layout JSON
44
+ def get_json
45
+ response = @api.request(:get, @location)
46
+ response.body if response.status == 200
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,136 @@
1
+ require 'eventmachine'
2
+
3
+ module Gini
4
+ module Api
5
+
6
+ # Contains document related data from uploaded or fetched document
7
+ #
8
+ class Document
9
+
10
+ attr_accessor :duration
11
+
12
+ # Instantiate a new Gini::Api::Document object from URL
13
+ #
14
+ # @param [Gini::Api::Client] api Gini::Api::Client object
15
+ # @param [String] location Document URL
16
+ # @param [Hash] from_data Hash with doc data (from search for example)
17
+ #
18
+ def initialize(api, location, from_data = nil)
19
+ @api = api
20
+ @location = location
21
+
22
+ update(from_data)
23
+ end
24
+
25
+ # Fetch document resource and populate instance variables
26
+ #
27
+ # @param [Hash] from_data Ruby hash with doc data
28
+ #
29
+ def update(from_data = nil)
30
+ data = {}
31
+
32
+ if from_data.nil?
33
+ response = @api.request(:get, @location)
34
+ unless response.status == 200
35
+ raise Gini::Api::DocumentError.new(
36
+ "Failed to fetch document data (code=#{response.status})",
37
+ response
38
+ )
39
+ end
40
+ data = response.parsed
41
+ else
42
+ data = from_data
43
+ end
44
+
45
+ data.each do |k, v|
46
+ instance_variable_set("@#{k}", v)
47
+
48
+ # We skip pages as it's rewritted by method pages()
49
+ next if k == :pages
50
+
51
+ self.class.send(:attr_reader, k)
52
+ end
53
+ end
54
+
55
+ # Poll document progress and return when state equals COMPLETED
56
+ # Known states are PENDING, COMPLETED and ERROR
57
+ #
58
+ # @param [Float] interval API polling interval
59
+ #
60
+ def poll(interval = 0.5)
61
+ EM.run do
62
+ EM.add_periodic_timer(interval) do
63
+ update
64
+ EM.stop if @progress =~ /(COMPLETED|ERROR)/
65
+ yield self if block_given?
66
+ end
67
+ end
68
+ end
69
+
70
+ # Get processed document
71
+ #
72
+ # @return [data] The binary representation of the processed document (pdf, jpg, png, ...)
73
+ #
74
+ def processed
75
+ response = @api.request(
76
+ :get,
77
+ @_links[:processed],
78
+ headers: { accept: 'application/octet-stream' }
79
+ )
80
+ unless response.status == 200
81
+ raise Gini::Api::DocumentError.new(
82
+ "Failed to fetch processed document (code=#{response.status})",
83
+ response
84
+ )
85
+ end
86
+ response.body
87
+ end
88
+
89
+ # Initialize extractions from @_links and return Gini::Api::Extractions object
90
+ #
91
+ # @return [Gini::Api::Document::Extractions] Return Gini::Api::Document::Extractions object for uploaded document
92
+ #
93
+ def extractions
94
+ @extractions ||= Gini::Api::Document::Extractions.new(@api, @_links[:extractions])
95
+ end
96
+
97
+ # Initialize layout from @_links[:layout] and return Gini::Api::Layout object
98
+ #
99
+ # @return [Gini::Api::Document::Layout] Return Gini::Api::Document::Layout object for uploaded document
100
+ #
101
+ def layout
102
+ @layout ||= Gini::Api::Document::Layout.new(@api, @_links[:layout])
103
+ end
104
+
105
+ # Override @pages instance variable. Removes key :pageNumber, key :images and starts by index 0.
106
+ # Page 1 becomes index 0
107
+ #
108
+ def pages
109
+ @pages.map { |page| page[:images] }
110
+ end
111
+
112
+ # Submit feedback on extraction label
113
+ #
114
+ # @param [String] label Extraction label to submit feedback on
115
+ # @param [String] value The new value for the given label
116
+ #
117
+ def submit_feedback(label, value)
118
+ unless extractions.send(label.to_sym)
119
+ raise Gini::Api::DocumentError.new("Unknown label #{label}: Not found")
120
+ end
121
+ response = @api.request(
122
+ :put,
123
+ "#{@_links[:extractions]}/#{label}",
124
+ headers: { 'content-type' => @api.version_header[:accept] },
125
+ body: { value: value }.to_json
126
+ )
127
+ unless response.status == 204
128
+ raise Gini::Api::DocumentError.new(
129
+ "Failed to submit feedback for label #{label} (code=#{response.status})",
130
+ response
131
+ )
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,35 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Set of documents resulting from search or list query
5
+ #
6
+ class DocumentSet
7
+
8
+ attr_reader :total, :offset, :documents
9
+
10
+ # Enumerable mixin
11
+ include Enumerable
12
+
13
+ # Instantiate a new Gini::Api::Document object from URL
14
+ #
15
+ # @param [Gini::Api::Client] api Gini::Api::Client object
16
+ # @param [Hash] data Container for documents
17
+ # @option data [Integer] :totalCount Total number of documents
18
+ # @option data [Aarray] :documents List of documents including all data
19
+ #
20
+ def initialize(api, data)
21
+ @total = data[:totalCount]
22
+ @documents = data[:documents].map do |doc|
23
+ Gini::Api::Document.new(api, doc[:_links][:document], doc)
24
+ end
25
+ end
26
+
27
+ # Allow iteration on documents by yielding documents
28
+ # Required by Enumerable mixin
29
+ #
30
+ def each
31
+ @documents.each { |d| yield(d) }
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,100 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Base api exception class
5
+ #
6
+ # @!attribute [r] api_response
7
+ # @return [Faraday::Response] Faraday response object
8
+ # @!attribute [r] api_method
9
+ # @return [String] HTTP method (:get, :post, :put, :delete)
10
+ # @!attribute [r] api_url
11
+ # @return [String] Request URL
12
+ # @!attribute [r] api_status
13
+ # @return [Integer] HTTP status code
14
+ # @!attribute [r] api_message
15
+ # @return [String] Message from API error object
16
+ # @see http://developer.gini.net/gini-api/html/overview.html#client-errors
17
+ # @!attribute [r] api_reqid
18
+ # @return [String] Request id from API error object
19
+ # @see http://developer.gini.net/gini-api/html/overview.html#client-errors
20
+ # @!attribute [r] docid
21
+ # @return [String] Optional document-id that caused the exception
22
+ #
23
+ class Error < StandardError
24
+ attr_reader :api_response, :api_method, :api_url
25
+ attr_reader :api_status, :api_message, :api_request_id
26
+ attr_accessor :docid
27
+
28
+ # Parse response object and set instance vars accordingly
29
+ #
30
+ # @param [String] msg Exception message
31
+ # @param [OAuth2::Response] api_response Faraday/Oauth2 response object from API
32
+ #
33
+ def initialize(msg, api_response = nil)
34
+ super(msg)
35
+
36
+ # Automatically use included response object if possible
37
+ @api_response = api_response.respond_to?(:response) ? api_response.response : api_response
38
+
39
+ # Parse response and set instance vars
40
+ parse_response unless @api_response.nil?
41
+ end
42
+
43
+ # Build api error message rom api response
44
+ #
45
+ def api_error
46
+ return nil if @api_response.nil?
47
+
48
+ m = "#{@api_method.to_s.upcase} "
49
+ m << "#{@api_url} : "
50
+ m << "#{@api_status} - "
51
+ m << "#{@api_message} (request Id: #{@api_request_id})"
52
+ m
53
+ end
54
+
55
+ # Parse Faraday response and fill instance variables
56
+ #
57
+ def parse_response
58
+ @api_method = @api_response.env[:method]
59
+ @api_url = @api_response.env[:url].to_s
60
+ @api_status = @api_response.status
61
+ @api_message = 'undef'
62
+ @api_request_id = 'undef'
63
+
64
+ unless @api_response.body.empty?
65
+ begin
66
+ parsed = JSON.parse(@api_response.body, symbolize_names: true)
67
+ @api_message = parsed[:message]
68
+ @api_request_id = parsed[:requestId]
69
+ rescue JSON::ParserError
70
+ # We fail silently as defaults have been set
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ # OAuth related errors
77
+ #
78
+ OAuthError = Class.new(Error)
79
+
80
+ # Document related errors
81
+ #
82
+ DocumentError = Class.new(Error)
83
+
84
+ # Upload related errors
85
+ #
86
+ UploadError = Class.new(Error)
87
+
88
+ # Processing related errors
89
+ #
90
+ ProcessingError = Class.new(Error)
91
+
92
+ # Search related errors
93
+ #
94
+ SearchError = Class.new(Error)
95
+
96
+ # Generic request errors
97
+ #
98
+ RequestError = Class.new(Error)
99
+ end
100
+ end