gini-api 0.9.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,284 @@
1
+ require 'uri'
2
+ require 'json'
3
+ require 'logger'
4
+ require 'faraday'
5
+ require 'benchmark'
6
+
7
+ module Gini
8
+ module Api
9
+
10
+ # Main class to operate on the Gini API
11
+ #
12
+ class Client
13
+
14
+ attr_reader :token, :log
15
+
16
+ # Instantiate a new Gini::Api::Client object with OAuth capabilities
17
+ #
18
+ # @param [Hash] options Hash of available config settings
19
+ # @option options [String] :client_id OAuth client_id
20
+ # @option options [String] :client_secret OAuth client_secret
21
+ # @option options [String] :oauth_site OAuth site to connect to (https://user.gini.net)
22
+ # @option options [String] :oauth_redirect Redirect URI
23
+ # @option options [Integer] :upload_timeout Upload timeout in seconds
24
+ # @option options [Integer] :processing_timeout API operational timeout in seconds
25
+ # @option options [String] :api_uri API URI (https://api.gini.net)
26
+ # @option options [String] :api_version API version to use (v1)
27
+ # @option options [Logger] :log logger object to use (initialized with STDOUT otherwise)
28
+ #
29
+ # @example
30
+ # api = Gini::Api::Client.new(
31
+ # client_id: 'my_client_id',
32
+ # client_secret: 'my_client_secret',
33
+ # )
34
+ #
35
+ def initialize(options = {})
36
+ opts = {
37
+ oauth_site: 'https://user.gini.net/',
38
+ oauth_redirect: 'http://localhost',
39
+ api_uri: 'https://api.gini.net',
40
+ api_version: 'v1',
41
+ api_type: 'json',
42
+ upload_timeout: 90,
43
+ processing_timeout: 180,
44
+ log: Logger.new(STDOUT),
45
+ }.merge(options)
46
+
47
+ # Ensure mandatory keys are set
48
+ [:client_id, :client_secret].each do |k|
49
+ raise Gini::Api::Error.new("Mandatory option key is missing: #{k}") unless opts.key?(k)
50
+ end
51
+
52
+ # Populate instance variables from merged opts
53
+ opts.each do |k, v|
54
+ instance_variable_set("@#{k}", v)
55
+ self.class.send(:attr_reader, k)
56
+ end
57
+
58
+ # Ensure STDOUT is flushed
59
+ STDOUT.sync = true
60
+
61
+ # Sanitize api_uri
62
+ @api_uri.sub!(/(\/)+$/, '')
63
+
64
+ # Create upload connection
65
+ @upload_connection = Faraday.new(url: @api_uri) do |builder|
66
+ builder.use(Faraday::Request::Multipart)
67
+ builder.use(Faraday::Request::UrlEncoded)
68
+ builder.request(:retry, 3)
69
+ builder.adapter(Faraday.default_adapter)
70
+ end
71
+
72
+ # Register parser (json+xml) based on API version
73
+ register_parser
74
+
75
+ @log.info('Gini API client initialized')
76
+ @log.info("Target: #{@api_uri}")
77
+ end
78
+
79
+ # Register OAuth2 response parser
80
+ #
81
+ def register_parser
82
+ OAuth2::Response.register_parser(:gini_json, [version_header(:json)[:accept]]) do |body|
83
+ MultiJson.load(body, symbolize_keys: true) rescue body
84
+ end
85
+ OAuth2::Response.register_parser(:gini_xml, [version_header(:xml)[:accept]]) do |body|
86
+ MultiXml.parse(body) rescue body
87
+ end
88
+ end
89
+
90
+ # Acquire OAuth2 token and popolate @oauth (instance of Gini::Api::OAuth.new)
91
+ # and @token (OAuth2::AccessToken). Supports 2 strategies: username/password and authorization code
92
+ #
93
+ # @param [Hash] opts Your authorization credentials
94
+ # @option opts [String] :auth_code OAuth authorization code. Will be exchanged for a token
95
+ # @option opts [String] :username API username
96
+ # @option opts [String] :password API password
97
+ #
98
+ # @example
99
+ # api.login(auth_code: '1234567890')
100
+ # @example
101
+ # api.login(username: 'me@example.com', password: 'secret')
102
+ #
103
+ def login(opts)
104
+ @oauth = Gini::Api::OAuth.new(self, opts)
105
+ @token = @oauth.token
106
+ end
107
+
108
+ # Destroy OAuth2 token
109
+ #
110
+ def logout
111
+ @oauth.destroy
112
+ end
113
+
114
+ # Version accept header based on @api_version
115
+ #
116
+ # @param [Symbol, String] type Expected response type (:xml, :json)
117
+ #
118
+ # @return [Hash] Return accept header or empty hash
119
+ #
120
+ def version_header(type = @api_type)
121
+ { accept: "application/vnd.gini.#{@api_version}+#{type}" }
122
+ end
123
+
124
+ # Request wrapper that sets URI and accept header
125
+ #
126
+ # @param [Symbol] verb HTTP request verb (:get, :post, :put, :delete)
127
+ # @param [String] resource API resource like /documents
128
+ # @param [Hash] options Optional type and custom headers
129
+ # @option options [String] :type Type to pass to version_header (:xml, :json)
130
+ # @option options [Hash] :headers Custom headers. Must include accept
131
+ #
132
+ def request(verb, resource, options = {})
133
+ opts = {
134
+ headers: version_header(options.delete(:type) || @api_type)
135
+ }.merge(options)
136
+
137
+ timeout(@processing_timeout) do
138
+ @token.send(verb.to_sym, @api_uri + URI.parse(resource).path, opts)
139
+ end
140
+ rescue OAuth2::Error => e
141
+ raise Gini::Api::RequestError.new(
142
+ "API request failed: #{verb} #{resource} (code=#{e.response.status})",
143
+ e.response
144
+ )
145
+ rescue Timeout::Error => e
146
+ raise Gini::Api::ProcessingError.new(
147
+ "API request timed out: #{verb} #{resource} (#{e.message})"
148
+ )
149
+ end
150
+
151
+ # Upload a document
152
+ #
153
+ # @param [String] file path of the document to upload
154
+ #
155
+ # @return [Gini::Api::Document] Return Gini::Api::Document object for uploaded document
156
+ #
157
+ # @example Upload and wait for completion
158
+ # doc = api.upload('/tmp/myfile.pdf')
159
+ # @example Upload and monitor progress
160
+ # doc = api.upload('/tmp/myfile.pdf') { |d| puts "Progress: #{d.progress}" }
161
+ #
162
+ def upload(file, &block)
163
+ @log.info("Uploading #{file}")
164
+
165
+ duration = {}
166
+
167
+ # Document upload
168
+ duration[:upload] = Benchmark.realtime do
169
+ @response = @upload_connection.post do |req|
170
+ req.options[:timeout] = @upload_timeout
171
+ req.url 'documents/'
172
+ req.headers['Content-Type'] = 'multipart/form-data'
173
+ req.headers['Authorization'] = "Bearer #{@token.token}"
174
+ req.headers.merge!(version_header)
175
+ req.body = { file: Faraday::UploadIO.new(file, 'application/octet-stream') }
176
+ end
177
+ end
178
+
179
+ # Start polling (0.5s) when document has been uploaded successfully
180
+ if @response.status == 201
181
+ location = @response.headers['location']
182
+ doc = Gini::Api::Document.new(self, location)
183
+ begin
184
+ timeout(@processing_timeout) do
185
+ duration[:processing] = Benchmark.realtime do
186
+ doc.poll(&block)
187
+ end
188
+ end
189
+ rescue Timeout::Error => e
190
+ ex = Gini::Api::ProcessingError.new(e.message)
191
+ ex.docid = doc.id
192
+ raise ex
193
+ end
194
+ else
195
+ raise Gini::Api::UploadError.new(
196
+ "Document upload failed with HTTP code #{@response.status}",
197
+ @response
198
+ )
199
+ end
200
+
201
+ # Combine duration values and update doc object
202
+ duration[:total] = duration[:upload] + duration[:processing]
203
+ doc.duration = duration
204
+
205
+ doc
206
+ end
207
+
208
+ # Delete document
209
+ #
210
+ # @param [String] id document ID
211
+ #
212
+ def delete(id)
213
+ response = request(:delete, "/documents/#{id}")
214
+ unless response.status == 204
215
+ raise Gini::Api::DocumentError.new(
216
+ "Deletion of docId #{id} failed (code=#{response.status})",
217
+ response
218
+ )
219
+ end
220
+ @log.info("Deleted document #{id}")
221
+ end
222
+
223
+ # Get document by Id
224
+ #
225
+ # @param [String] id document ID
226
+ #
227
+ # @return [Gini::Api::Document] Return Gini::Api::Document object
228
+ #
229
+ def get(id)
230
+ Gini::Api::Document.new(self, "/documents/#{id}")
231
+ end
232
+
233
+ # List all documents
234
+ #
235
+ # @param [Hash] options List options (offset and limit)
236
+ # @option options [Integer] :limit Maximum number of documents to return (defaults to 20)
237
+ # @option options [Integer] :offset Start offset. Defaults to 0
238
+ #
239
+ # @return [Gini::Api::DocumentSet] Returns a DocumentSet with total, offset and a list of Document objects
240
+ #
241
+ def list(options = {})
242
+ opts = { limit: 20, offset: 0 }.merge(options)
243
+ limit = Integer(opts[:limit])
244
+ offset = Integer(opts[:offset])
245
+
246
+ response = request(:get, "/documents?limit=#{limit}&next=#{offset}")
247
+ unless response.status == 200
248
+ raise Gini::Api::DocumentError.new(
249
+ "Failed to get list of documents (code=#{response.status})",
250
+ response
251
+ )
252
+ end
253
+ Gini::Api::DocumentSet.new(self, response.parsed)
254
+ end
255
+
256
+ # Fulltext search for documents
257
+ #
258
+ # @param [String, Array] query The search term(s), separated by space. Multiple terms as array
259
+ # @param [Hash] options Search options
260
+ # @option options [String] :type Only include documents with the given doctype
261
+ # @option options [Integer] :limit Number of results per page. Must be between 1 and 250. Defaults to 20
262
+ # @option options Integer] :offset Start offset. Defaults to 0
263
+ #
264
+ # @return [Gini::Api::DocumentSet] Returns a DocumentSet with total, offset and a list of Document objects
265
+ #
266
+ def search(query, options = {})
267
+ opts = { type: '', limit: 20, offset: 0 }.merge(options)
268
+ query = URI.escape(query)
269
+ type = URI.escape(opts[:type])
270
+ limit = Integer(opts[:limit])
271
+ offset = Integer(opts[:offset])
272
+
273
+ response = request(:get, "/search?q=#{query}&type=#{type}&limit=#{limit}&next=#{offset}")
274
+ unless response.status == 200
275
+ raise Gini::Api::SearchError.new(
276
+ "Search query failed with code #{response.status}",
277
+ response
278
+ )
279
+ end
280
+ Gini::Api::DocumentSet.new(self, response.parsed)
281
+ end
282
+ end
283
+ end
284
+ end
@@ -0,0 +1,59 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Contains document related extractions
5
+ #
6
+ class Document::Extractions
7
+
8
+ attr_reader :raw
9
+
10
+ # Instantiate a new Gini::Api::Extractions object from hash
11
+ #
12
+ # @param [Gini::Api::Client] api Gini::Api::Client object
13
+ # @param [String] location Document URL
14
+ def initialize(api, location)
15
+ @api = api
16
+ @location = location
17
+
18
+ update
19
+ end
20
+
21
+ # Populate instance variables from fetched extractions
22
+ #
23
+ def update
24
+ response = @api.request(:get, @location)
25
+
26
+ unless response.status == 200
27
+ raise Gini::Api::DocumentError.new(
28
+ "Failed to fetch extractions from #{@location}",
29
+ response
30
+ )
31
+ end
32
+
33
+ # Entire response
34
+ @raw = response.parsed
35
+
36
+ response.parsed[:extractions].each do |k,v|
37
+ instance_variable_set("@#{k}", v)
38
+ self.class.send(:attr_reader, k)
39
+ end
40
+
41
+ instance_variable_set("@candidates", response.parsed[:candidates])
42
+ self.class.send(:attr_reader, :candidates)
43
+ end
44
+
45
+ # Get filed value for given extraction key
46
+ #
47
+ # @param [String] item The extractions item to get the value of
48
+ # @return [String, Integer] Returns the value from extractions hash
49
+ #
50
+ def [](item)
51
+ unless instance_variable_get("@#{item}")
52
+ raise Gini::Api::DocumentError.new("Invalid extraction key #{item}: Not found")
53
+ end
54
+
55
+ instance_variable_get("@#{item}")[:value]
56
+ end
57
+ end
58
+ end
59
+ end
@@ -0,0 +1,50 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Contains document layout in XML & JSON
5
+ #
6
+ class Document::Layout
7
+
8
+ # Instantiate a new Gini::Api::Layout object from layout url
9
+ #
10
+ # @param [Gini::Api::Client] api Gini::Api::Client object
11
+ # @param [String] location Document URL
12
+ def initialize(api, location)
13
+ @api = api
14
+ @location = location
15
+ end
16
+
17
+ # Return layout as XML string
18
+ #
19
+ # @return [String] Returns the layout as XML string
20
+ def to_xml
21
+ @xml ||= get_xml
22
+ end
23
+
24
+ # Return layout as JSON string
25
+ #
26
+ # @return [String] Returns the layout as JSON string
27
+ def to_json
28
+ @json ||= get_json
29
+ end
30
+
31
+ private
32
+
33
+ # Get value of layout in XML
34
+ #
35
+ # @return [String] Returns layout XML
36
+ def get_xml
37
+ response = @api.request(:get, @location, type: 'xml')
38
+ response.body if response.status == 200
39
+ end
40
+
41
+ # Get value of extraction. Convinience method
42
+ #
43
+ # @return [String] Returns layout JSON
44
+ def get_json
45
+ response = @api.request(:get, @location)
46
+ response.body if response.status == 200
47
+ end
48
+ end
49
+ end
50
+ end
@@ -0,0 +1,136 @@
1
+ require 'eventmachine'
2
+
3
+ module Gini
4
+ module Api
5
+
6
+ # Contains document related data from uploaded or fetched document
7
+ #
8
+ class Document
9
+
10
+ attr_accessor :duration
11
+
12
+ # Instantiate a new Gini::Api::Document object from URL
13
+ #
14
+ # @param [Gini::Api::Client] api Gini::Api::Client object
15
+ # @param [String] location Document URL
16
+ # @param [Hash] from_data Hash with doc data (from search for example)
17
+ #
18
+ def initialize(api, location, from_data = nil)
19
+ @api = api
20
+ @location = location
21
+
22
+ update(from_data)
23
+ end
24
+
25
+ # Fetch document resource and populate instance variables
26
+ #
27
+ # @param [Hash] from_data Ruby hash with doc data
28
+ #
29
+ def update(from_data = nil)
30
+ data = {}
31
+
32
+ if from_data.nil?
33
+ response = @api.request(:get, @location)
34
+ unless response.status == 200
35
+ raise Gini::Api::DocumentError.new(
36
+ "Failed to fetch document data (code=#{response.status})",
37
+ response
38
+ )
39
+ end
40
+ data = response.parsed
41
+ else
42
+ data = from_data
43
+ end
44
+
45
+ data.each do |k, v|
46
+ instance_variable_set("@#{k}", v)
47
+
48
+ # We skip pages as it's rewritted by method pages()
49
+ next if k == :pages
50
+
51
+ self.class.send(:attr_reader, k)
52
+ end
53
+ end
54
+
55
+ # Poll document progress and return when state equals COMPLETED
56
+ # Known states are PENDING, COMPLETED and ERROR
57
+ #
58
+ # @param [Float] interval API polling interval
59
+ #
60
+ def poll(interval = 0.5)
61
+ EM.run do
62
+ EM.add_periodic_timer(interval) do
63
+ update
64
+ EM.stop if @progress =~ /(COMPLETED|ERROR)/
65
+ yield self if block_given?
66
+ end
67
+ end
68
+ end
69
+
70
+ # Get processed document
71
+ #
72
+ # @return [data] The binary representation of the processed document (pdf, jpg, png, ...)
73
+ #
74
+ def processed
75
+ response = @api.request(
76
+ :get,
77
+ @_links[:processed],
78
+ headers: { accept: 'application/octet-stream' }
79
+ )
80
+ unless response.status == 200
81
+ raise Gini::Api::DocumentError.new(
82
+ "Failed to fetch processed document (code=#{response.status})",
83
+ response
84
+ )
85
+ end
86
+ response.body
87
+ end
88
+
89
+ # Initialize extractions from @_links and return Gini::Api::Extractions object
90
+ #
91
+ # @return [Gini::Api::Document::Extractions] Return Gini::Api::Document::Extractions object for uploaded document
92
+ #
93
+ def extractions
94
+ @extractions ||= Gini::Api::Document::Extractions.new(@api, @_links[:extractions])
95
+ end
96
+
97
+ # Initialize layout from @_links[:layout] and return Gini::Api::Layout object
98
+ #
99
+ # @return [Gini::Api::Document::Layout] Return Gini::Api::Document::Layout object for uploaded document
100
+ #
101
+ def layout
102
+ @layout ||= Gini::Api::Document::Layout.new(@api, @_links[:layout])
103
+ end
104
+
105
+ # Override @pages instance variable. Removes key :pageNumber, key :images and starts by index 0.
106
+ # Page 1 becomes index 0
107
+ #
108
+ def pages
109
+ @pages.map { |page| page[:images] }
110
+ end
111
+
112
+ # Submit feedback on extraction label
113
+ #
114
+ # @param [String] label Extraction label to submit feedback on
115
+ # @param [String] value The new value for the given label
116
+ #
117
+ def submit_feedback(label, value)
118
+ unless extractions.send(label.to_sym)
119
+ raise Gini::Api::DocumentError.new("Unknown label #{label}: Not found")
120
+ end
121
+ response = @api.request(
122
+ :put,
123
+ "#{@_links[:extractions]}/#{label}",
124
+ headers: { 'content-type' => @api.version_header[:accept] },
125
+ body: { value: value }.to_json
126
+ )
127
+ unless response.status == 204
128
+ raise Gini::Api::DocumentError.new(
129
+ "Failed to submit feedback for label #{label} (code=#{response.status})",
130
+ response
131
+ )
132
+ end
133
+ end
134
+ end
135
+ end
136
+ end
@@ -0,0 +1,35 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Set of documents resulting from search or list query
5
+ #
6
+ class DocumentSet
7
+
8
+ attr_reader :total, :offset, :documents
9
+
10
+ # Enumerable mixin
11
+ include Enumerable
12
+
13
+ # Instantiate a new Gini::Api::Document object from URL
14
+ #
15
+ # @param [Gini::Api::Client] api Gini::Api::Client object
16
+ # @param [Hash] data Container for documents
17
+ # @option data [Integer] :totalCount Total number of documents
18
+ # @option data [Aarray] :documents List of documents including all data
19
+ #
20
+ def initialize(api, data)
21
+ @total = data[:totalCount]
22
+ @documents = data[:documents].map do |doc|
23
+ Gini::Api::Document.new(api, doc[:_links][:document], doc)
24
+ end
25
+ end
26
+
27
+ # Allow iteration on documents by yielding documents
28
+ # Required by Enumerable mixin
29
+ #
30
+ def each
31
+ @documents.each { |d| yield(d) }
32
+ end
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,100 @@
1
+ module Gini
2
+ module Api
3
+
4
+ # Base api exception class
5
+ #
6
+ # @!attribute [r] api_response
7
+ # @return [Faraday::Response] Faraday response object
8
+ # @!attribute [r] api_method
9
+ # @return [String] HTTP method (:get, :post, :put, :delete)
10
+ # @!attribute [r] api_url
11
+ # @return [String] Request URL
12
+ # @!attribute [r] api_status
13
+ # @return [Integer] HTTP status code
14
+ # @!attribute [r] api_message
15
+ # @return [String] Message from API error object
16
+ # @see http://developer.gini.net/gini-api/html/overview.html#client-errors
17
+ # @!attribute [r] api_reqid
18
+ # @return [String] Request id from API error object
19
+ # @see http://developer.gini.net/gini-api/html/overview.html#client-errors
20
+ # @!attribute [r] docid
21
+ # @return [String] Optional document-id that caused the exception
22
+ #
23
+ class Error < StandardError
24
+ attr_reader :api_response, :api_method, :api_url
25
+ attr_reader :api_status, :api_message, :api_request_id
26
+ attr_accessor :docid
27
+
28
+ # Parse response object and set instance vars accordingly
29
+ #
30
+ # @param [String] msg Exception message
31
+ # @param [OAuth2::Response] api_response Faraday/Oauth2 response object from API
32
+ #
33
+ def initialize(msg, api_response = nil)
34
+ super(msg)
35
+
36
+ # Automatically use included response object if possible
37
+ @api_response = api_response.respond_to?(:response) ? api_response.response : api_response
38
+
39
+ # Parse response and set instance vars
40
+ parse_response unless @api_response.nil?
41
+ end
42
+
43
+ # Build api error message rom api response
44
+ #
45
+ def api_error
46
+ return nil if @api_response.nil?
47
+
48
+ m = "#{@api_method.to_s.upcase} "
49
+ m << "#{@api_url} : "
50
+ m << "#{@api_status} - "
51
+ m << "#{@api_message} (request Id: #{@api_request_id})"
52
+ m
53
+ end
54
+
55
+ # Parse Faraday response and fill instance variables
56
+ #
57
+ def parse_response
58
+ @api_method = @api_response.env[:method]
59
+ @api_url = @api_response.env[:url].to_s
60
+ @api_status = @api_response.status
61
+ @api_message = 'undef'
62
+ @api_request_id = 'undef'
63
+
64
+ unless @api_response.body.empty?
65
+ begin
66
+ parsed = JSON.parse(@api_response.body, symbolize_names: true)
67
+ @api_message = parsed[:message]
68
+ @api_request_id = parsed[:requestId]
69
+ rescue JSON::ParserError
70
+ # We fail silently as defaults have been set
71
+ end
72
+ end
73
+ end
74
+ end
75
+
76
+ # OAuth related errors
77
+ #
78
+ OAuthError = Class.new(Error)
79
+
80
+ # Document related errors
81
+ #
82
+ DocumentError = Class.new(Error)
83
+
84
+ # Upload related errors
85
+ #
86
+ UploadError = Class.new(Error)
87
+
88
+ # Processing related errors
89
+ #
90
+ ProcessingError = Class.new(Error)
91
+
92
+ # Search related errors
93
+ #
94
+ SearchError = Class.new(Error)
95
+
96
+ # Generic request errors
97
+ #
98
+ RequestError = Class.new(Error)
99
+ end
100
+ end