mindee 3.16.0 → 3.18.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (55) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.md +4 -4
  4. data/bin/mindee.rb +20 -8
  5. data/docs/code_samples/{international_id_v1_async.txt → driver_license_v1_async.txt} +1 -1
  6. data/docs/code_samples/french_healthcard_v1_async.txt +19 -0
  7. data/docs/code_samples/{carte_vitale_v1.txt → payslip_fra_v3_async.txt} +2 -2
  8. data/docs/code_samples/workflow_execution.txt +29 -0
  9. data/docs/custom_v1.md +1 -1
  10. data/docs/driver_license_v1.md +156 -0
  11. data/docs/{carte_vitale_v1.md → french_healthcard_v1.md} +14 -24
  12. data/docs/getting_started.md +5 -5
  13. data/docs/payslip_fra_v3.md +319 -0
  14. data/lib/mindee/client.rb +40 -0
  15. data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +34 -19
  16. data/lib/mindee/http/workflow_endpoint.rb +90 -0
  17. data/lib/mindee/http.rb +1 -0
  18. data/lib/mindee/input/sources/base64_input_source.rb +31 -0
  19. data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
  20. data/lib/mindee/input/sources/file_input_source.rb +20 -0
  21. data/lib/mindee/input/sources/local_input_source.rb +183 -0
  22. data/lib/mindee/input/sources/path_input_source.rb +20 -0
  23. data/lib/mindee/input/sources/url_input_source.rb +127 -0
  24. data/lib/mindee/input/sources.rb +6 -248
  25. data/lib/mindee/parsing/common/api_response.rb +22 -1
  26. data/lib/mindee/parsing/common/execution.rb +73 -0
  27. data/lib/mindee/parsing/common/execution_file.rb +24 -0
  28. data/lib/mindee/parsing/common/execution_priority.rb +30 -0
  29. data/lib/mindee/parsing/common.rb +3 -0
  30. data/lib/mindee/product/{international_id/international_id_v1.rb → driver_license/driver_license_v1.rb} +9 -9
  31. data/lib/mindee/product/driver_license/driver_license_v1_document.rb +91 -0
  32. data/lib/mindee/product/{international_id/international_id_v1_page.rb → driver_license/driver_license_v1_page.rb} +7 -7
  33. data/lib/mindee/product/fr/{carte_vitale/carte_vitale_v1.rb → health_card/health_card_v1.rb} +9 -9
  34. data/lib/mindee/product/fr/{carte_vitale/carte_vitale_v1_document.rb → health_card/health_card_v1_document.rb} +6 -6
  35. data/lib/mindee/product/fr/{carte_vitale/carte_vitale_v1_page.rb → health_card/health_card_v1_page.rb} +7 -7
  36. data/lib/mindee/product/fr/payslip/payslip_v3.rb +41 -0
  37. data/lib/mindee/product/fr/payslip/payslip_v3_bank_account_detail.rb +54 -0
  38. data/lib/mindee/product/fr/payslip/payslip_v3_document.rb +166 -0
  39. data/lib/mindee/product/fr/payslip/payslip_v3_employee.rb +78 -0
  40. data/lib/mindee/product/fr/payslip/payslip_v3_employer.rb +78 -0
  41. data/lib/mindee/product/fr/payslip/payslip_v3_employment.rb +78 -0
  42. data/lib/mindee/product/fr/payslip/payslip_v3_page.rb +34 -0
  43. data/lib/mindee/product/fr/payslip/payslip_v3_paid_time_off.rb +89 -0
  44. data/lib/mindee/product/fr/payslip/payslip_v3_pay_detail.rb +100 -0
  45. data/lib/mindee/product/fr/payslip/payslip_v3_pay_period.rb +66 -0
  46. data/lib/mindee/product/fr/payslip/payslip_v3_salary_detail.rb +89 -0
  47. data/lib/mindee/product/resume/resume_v1_document.rb +1 -1
  48. data/lib/mindee/product/resume/resume_v1_page.rb +1 -1
  49. data/lib/mindee/product.rb +3 -2
  50. data/lib/mindee/version.rb +1 -1
  51. metadata +36 -14
  52. data/docs/eu_driver_license_v1.md +0 -227
  53. data/docs/proof_of_address_v1.md +0 -211
  54. data/docs/us_driver_license_v1.md +0 -272
  55. data/lib/mindee/product/international_id/international_id_v1_document.rb +0 -109
@@ -0,0 +1,183 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+ require 'marcel'
5
+
6
+ require_relative '../../pdf'
7
+ require_relative '../../image'
8
+
9
+ module Mindee
10
+ module Input
11
+ # Document source handling.
12
+ module Source
13
+ # Mime types accepted by the server.
14
+ ALLOWED_MIME_TYPES = [
15
+ 'application/pdf',
16
+ 'image/heic',
17
+ 'image/png',
18
+ 'image/jpeg',
19
+ 'image/tiff',
20
+ 'image/webp',
21
+ ].freeze
22
+
23
+ # Standard error for invalid mime types
24
+ class MimeTypeError < StandardError
25
+ end
26
+
27
+ # Error sent if the file's mimetype isn't allowed
28
+ class InvalidMimeTypeError < MimeTypeError
29
+ # @return [String]
30
+ attr_reader :invalid_mimetype
31
+
32
+ # @param mime_type [String]
33
+ def initialize(mime_type)
34
+ @invalid_mimetype = mime_type
35
+ super("'#{@invalid_mimetype}' mime type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}")
36
+ end
37
+ end
38
+
39
+ # Error sent if a pdf file couldn't be fixed
40
+ class UnfixablePDFError < MimeTypeError
41
+ def initialize
42
+ super("Corrupted PDF couldn't be repaired.")
43
+ end
44
+ end
45
+
46
+ # Base class for loading documents.
47
+ class LocalInputSource
48
+ # @return [String]
49
+ attr_reader :filename
50
+ # @return [String]
51
+ attr_reader :file_mimetype
52
+ # @return [StringIO]
53
+ attr_reader :io_stream
54
+
55
+ # @param io_stream [StringIO]
56
+ # @param filename [String]
57
+ # @param fix_pdf [Boolean]
58
+ def initialize(io_stream, filename, fix_pdf: false)
59
+ @io_stream = io_stream
60
+ @filename = filename
61
+ @file_mimetype = if fix_pdf
62
+ Marcel::MimeType.for @io_stream
63
+ else
64
+ Marcel::MimeType.for @io_stream, name: @filename
65
+ end
66
+ return if ALLOWED_MIME_TYPES.include? @file_mimetype
67
+
68
+ if filename.end_with?('.pdf') && fix_pdf
69
+ rescue_broken_pdf(@io_stream)
70
+ @file_mimetype = Marcel::MimeType.for @io_stream
71
+
72
+ return if ALLOWED_MIME_TYPES.include? @file_mimetype
73
+ end
74
+
75
+ raise InvalidMimeTypeError, @file_mimetype.to_s
76
+ end
77
+
78
+ # Attempts to fix pdf files if mimetype is rejected.
79
+ # "Broken PDFs" are often a result of third-party injecting invalid headers.
80
+ # This attempts to remove them and send the file
81
+ # @param stream [StringIO]
82
+ def rescue_broken_pdf(stream)
83
+ stream.gets('%PDF-')
84
+ raise UnfixablePDFError if stream.eof? || stream.pos > 500
85
+
86
+ stream.pos = stream.pos - 5
87
+ data = stream.read
88
+ @io_stream.close
89
+
90
+ @io_stream = StringIO.new
91
+ @io_stream << data
92
+ end
93
+
94
+ # Shorthand for pdf mimetype validation.
95
+ def pdf?
96
+ @file_mimetype.to_s == 'application/pdf'
97
+ end
98
+
99
+ # Parses a PDF file according to provided options.
100
+ # @param options [Hash, nil] Page cutting/merge options:
101
+ #
102
+ # * `:page_indexes` Zero-based list of page indexes.
103
+ # * `:operation` Operation to apply on the document, given the `page_indexes specified:
104
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
105
+ # * `:REMOVE` - remove the specified pages, and keep all others.
106
+ # * `:on_min_pages` Apply the operation only if document has at least this many pages.
107
+ def process_pdf(options)
108
+ @io_stream.seek(0)
109
+ @io_stream = PdfProcessor.parse(@io_stream, options)
110
+ end
111
+
112
+ # Reads a document.
113
+ # @param close [Boolean]
114
+ # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
115
+ def read_document(close: true)
116
+ @io_stream.seek(0)
117
+ # Avoids needlessly re-packing some files
118
+ data = @io_stream.read
119
+ @io_stream.close if close
120
+ ['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
121
+ end
122
+
123
+ def count_pdf_pages
124
+ return 1 unless pdf?
125
+
126
+ @io_stream.seek(0)
127
+ pdf_processor = Mindee::PDF::PdfProcessor.open_pdf(@io_stream)
128
+ pdf_processor.pages.size
129
+ end
130
+
131
+ # Compresses the file, according to the provided info.
132
+ # @param [Integer] quality Quality of the output file.
133
+ # @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
134
+ # @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
135
+ # @param [Boolean] force_source_text Whether to force the operation on PDFs with source text.
136
+ # This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
137
+ # WARNING: this operation is strongly discouraged.
138
+ # @param [Boolean] disable_source_text If the PDF has source text, whether to re-apply it to the original or
139
+ # not. Needs force_source_text to work.
140
+ def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
141
+ buffer = if pdf?
142
+ Mindee::PDF::PDFCompressor.compress_pdf(
143
+ @io_stream,
144
+ quality: quality,
145
+ force_source_text_compression: force_source_text,
146
+ disable_source_text: disable_source_text
147
+ )
148
+ else
149
+ Mindee::Image::ImageCompressor.compress_image(
150
+ @io_stream,
151
+ quality: quality,
152
+ max_width: max_width,
153
+ max_height: max_height
154
+ )
155
+ end
156
+ @io_stream = buffer
157
+ @io_stream.rewind
158
+ end
159
+
160
+ # Checks whether the file has source text if it is a pdf. False otherwise
161
+ # @return [Boolean] True if the file is a PDF and has source text.
162
+ def source_text?
163
+ Mindee::PDF::PDFTools.source_text?(@io_stream)
164
+ end
165
+ end
166
+
167
+ # Replaces non-ASCII characters by their UNICODE escape sequence.
168
+ # Keeps other characters as is.
169
+ # @return A clean String.
170
+ def self.convert_to_unicode_escape(string)
171
+ unicode_escape_string = ''.dup
172
+ string.each_char do |char|
173
+ unicode_escape_string << if char.bytesize > 1
174
+ "\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
175
+ else
176
+ char
177
+ end
178
+ end
179
+ unicode_escape_string
180
+ end
181
+ end
182
+ end
183
+ end
@@ -0,0 +1,20 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+
5
+ module Mindee
6
+ module Input
7
+ # Document source handling.
8
+ module Source
9
+ # Load a document from a path.
10
+ class PathInputSource < LocalInputSource
11
+ # @param filepath [String]
12
+ # @param fix_pdf [Boolean]
13
+ def initialize(filepath, fix_pdf: false)
14
+ io_stream = File.open(filepath, 'rb')
15
+ super(io_stream, File.basename(filepath), fix_pdf: fix_pdf)
16
+ end
17
+ end
18
+ end
19
+ end
20
+ end
@@ -0,0 +1,127 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'net/http'
4
+ require 'uri'
5
+ require 'fileutils'
6
+
7
+ module Mindee
8
+ module Input
9
+ module Source
10
+ # Load a remote document from a file url.
11
+ class UrlInputSource
12
+ # @return [String]
13
+ attr_reader :url
14
+
15
+ def initialize(url)
16
+ raise 'URL must be HTTPS' unless url.start_with? 'https://'
17
+
18
+ @url = url
19
+ end
20
+
21
+ # Downloads the file from the URL and saves it to the specified path.
22
+ #
23
+ # @param path [String] Path to save the file to.
24
+ # @param filename [String, nil] Optional name to give to the file.
25
+ # @param username [String, nil] Optional username for authentication.
26
+ # @param password [String, nil] Optional password for authentication.
27
+ # @param token [String, nil] Optional token for JWT-based authentication.
28
+ # @param max_redirects [Integer] Maximum amount of redirects to follow.
29
+ # @return [String] The full path of the saved file.
30
+ def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
31
+ response_body = fetch_file_content(username: username, password: password, token: token,
32
+ max_redirects: max_redirects)
33
+
34
+ filename = fill_filename(filename)
35
+
36
+ full_path = File.join(path.chomp('/'), filename)
37
+ File.write(full_path, response_body)
38
+
39
+ full_path
40
+ end
41
+
42
+ # Downloads the file from the url, and returns a BytesInputSource wrapper object for it.
43
+ #
44
+ # @param filename [String, nil] Optional name to give to the file.
45
+ # @param username [String, nil] Optional username for authentication.
46
+ # @param password [String, nil] Optional password for authentication.
47
+ # @param token [String, nil] Optional token for JWT-based authentication.
48
+ # @param max_redirects [Integer] Maximum amount of redirects to follow.
49
+ # @return [BytesInputSource] The full path of the saved file.
50
+ def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
51
+ filename = fill_filename(filename)
52
+ response_body = fetch_file_content(username: username, password: password, token: token,
53
+ max_redirects: max_redirects)
54
+ bytes = StringIO.new(response_body)
55
+
56
+ BytesInputSource.new(bytes.read, filename)
57
+ end
58
+
59
+ # Fetches the file content from the URL.
60
+ #
61
+ # @param username [String, nil] Optional username for authentication.
62
+ # @param password [String, nil] Optional password for authentication.
63
+ # @param token [String, nil] Optional token for JWT-based authentication.
64
+ # @param max_redirects [Integer] Maximum amount of redirects to follow.
65
+ # @return [String] The downloaded file content.
66
+ def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3)
67
+ uri = URI.parse(@url)
68
+ request = Net::HTTP::Get.new(uri)
69
+
70
+ request['Authorization'] = "Bearer #{token}" if token
71
+ request.basic_auth(username, password) if username && password
72
+
73
+ response = make_request(uri, request, max_redirects)
74
+ if response.code.to_i > 299
75
+ raise "Failed to download file: HTTP status code #{response.code}"
76
+ elsif response.code.to_i < 200
77
+ raise "Failed to download file: Invalid response code #{response.code}."
78
+ end
79
+
80
+ response.body
81
+ end
82
+
83
+ private
84
+
85
+ def extract_filename_from_url(uri)
86
+ filename = File.basename(uri.path)
87
+ filename.empty? ? '' : filename
88
+ end
89
+
90
+ def fill_filename(filename)
91
+ filename ||= extract_filename_from_url(URI.parse(@url))
92
+ if filename.empty? || File.extname(filename).empty?
93
+ filename = generate_file_name(extension: get_file_extension(filename))
94
+ end
95
+ filename
96
+ end
97
+
98
+ def make_request(uri, request, max_redirects)
99
+ Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
100
+ response = http.request(request)
101
+ if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
102
+ location = response['location']
103
+ raise 'No location in redirection header.' if location.nil?
104
+
105
+ new_uri = URI.parse(location)
106
+ request = Net::HTTP::Get.new(new_uri)
107
+ make_request(new_uri, request, max_redirects - 1)
108
+ else
109
+ response
110
+ end
111
+ end
112
+ end
113
+
114
+ def get_file_extension(filename)
115
+ ext = File.extname(filename)
116
+ ext.empty? ? nil : ext.downcase
117
+ end
118
+
119
+ def generate_file_name(extension: nil)
120
+ extension ||= '.tmp'
121
+ random_string = Array.new(8) { rand(36).to_s(36) }.join
122
+ "mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}"
123
+ end
124
+ end
125
+ end
126
+ end
127
+ end
@@ -1,250 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require 'stringio'
4
- require 'marcel'
5
-
6
- require_relative '../pdf'
7
- require_relative '../image'
8
-
9
- module Mindee
10
- module Input
11
- # Document source handling.
12
- module Source
13
- # Mime types accepted by the server.
14
- ALLOWED_MIME_TYPES = [
15
- 'application/pdf',
16
- 'image/heic',
17
- 'image/png',
18
- 'image/jpeg',
19
- 'image/tiff',
20
- 'image/webp',
21
- ].freeze
22
-
23
- # Standard error for invalid mime types
24
- class MimeTypeError < StandardError
25
- end
26
-
27
- # Error sent if the file's mimetype isn't allowed
28
- class InvalidMimeTypeError < MimeTypeError
29
- # @return [String]
30
- attr_reader :invalid_mimetype
31
-
32
- # @param mime_type [String]
33
- def initialize(mime_type)
34
- @invalid_mimetype = mime_type
35
- super("'#{@invalid_mimetype}' mime type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}")
36
- end
37
- end
38
-
39
- # Error sent if a pdf file couldn't be fixed
40
- class UnfixablePDFError < MimeTypeError
41
- def initialize
42
- super("Corrupted PDF couldn't be repaired.")
43
- end
44
- end
45
-
46
- # Base class for loading documents.
47
- class LocalInputSource
48
- # @return [String]
49
- attr_reader :filename
50
- # @return [String]
51
- attr_reader :file_mimetype
52
- # @return [StringIO]
53
- attr_reader :io_stream
54
-
55
- # @param io_stream [StringIO]
56
- # @param filename [String]
57
- # @param fix_pdf [Boolean]
58
- def initialize(io_stream, filename, fix_pdf: false)
59
- @io_stream = io_stream
60
- @filename = filename
61
- @file_mimetype = if fix_pdf
62
- Marcel::MimeType.for @io_stream
63
- else
64
- Marcel::MimeType.for @io_stream, name: @filename
65
- end
66
- return if ALLOWED_MIME_TYPES.include? @file_mimetype
67
-
68
- if filename.end_with?('.pdf') && fix_pdf
69
- rescue_broken_pdf(@io_stream)
70
- @file_mimetype = Marcel::MimeType.for @io_stream
71
-
72
- return if ALLOWED_MIME_TYPES.include? @file_mimetype
73
- end
74
-
75
- raise InvalidMimeTypeError, @file_mimetype.to_s
76
- end
77
-
78
- # Attempts to fix pdf files if mimetype is rejected.
79
- # "Broken PDFs" are often a result of third-party injecting invalid headers.
80
- # This attempts to remove them and send the file
81
- # @param stream [StringIO]
82
- def rescue_broken_pdf(stream)
83
- stream.gets('%PDF-')
84
- raise UnfixablePDFError if stream.eof? || stream.pos > 500
85
-
86
- stream.pos = stream.pos - 5
87
- data = stream.read
88
- @io_stream.close
89
-
90
- @io_stream = StringIO.new
91
- @io_stream << data
92
- end
93
-
94
- # Shorthand for pdf mimetype validation.
95
- def pdf?
96
- @file_mimetype.to_s == 'application/pdf'
97
- end
98
-
99
- # Parses a PDF file according to provided options.
100
- # @param options [Hash, nil] Page cutting/merge options:
101
- #
102
- # * `:page_indexes` Zero-based list of page indexes.
103
- # * `:operation` Operation to apply on the document, given the `page_indexes specified:
104
- # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
105
- # * `:REMOVE` - remove the specified pages, and keep all others.
106
- # * `:on_min_pages` Apply the operation only if document has at least this many pages.
107
- def process_pdf(options)
108
- @io_stream.seek(0)
109
- @io_stream = PdfProcessor.parse(@io_stream, options)
110
- end
111
-
112
- # Reads a document.
113
- # @param close [Boolean]
114
- # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
115
- def read_document(close: true)
116
- @io_stream.seek(0)
117
- # Avoids needlessly re-packing some files
118
- data = @io_stream.read
119
- @io_stream.close if close
120
- ['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
121
- end
122
-
123
- def count_pdf_pages
124
- return 1 unless pdf?
125
-
126
- @io_stream.seek(0)
127
- pdf_processor = Mindee::PDF::PdfProcessor.open_pdf(@io_stream)
128
- pdf_processor.pages.size
129
- end
130
-
131
- # Compresses the file, according to the provided info.
132
- # @param [Integer] quality Quality of the output file.
133
- # @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
134
- # @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
135
- # @param [Boolean] force_source_text Whether to force the operation on PDFs with source text.
136
- # This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
137
- # WARNING: this operation is strongly discouraged.
138
- # @param [Boolean] disable_source_text If the PDF has source text, whether to re-apply it to the original or
139
- # not. Needs force_source_text to work.
140
- def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
141
- buffer = if pdf?
142
- Mindee::PDF::PDFCompressor.compress_pdf(
143
- @io_stream,
144
- quality: quality,
145
- force_source_text_compression: force_source_text,
146
- disable_source_text: disable_source_text
147
- )
148
- else
149
- Mindee::Image::ImageCompressor.compress_image(
150
- @io_stream,
151
- quality: quality,
152
- max_width: max_width,
153
- max_height: max_height
154
- )
155
- end
156
- @io_stream = buffer
157
- @io_stream.rewind
158
- end
159
-
160
- # Checks whether the file has source text if it is a pdf. False otherwise
161
- # @return [Boolean] True if the file is a PDF and has source text.
162
- def source_text?
163
- Mindee::PDF::PDFTools.source_text?(@io_stream)
164
- end
165
- end
166
-
167
- # Load a document from a path.
168
- class PathInputSource < LocalInputSource
169
- # @param filepath [String]
170
- # @param fix_pdf [Boolean]
171
- def initialize(filepath, fix_pdf: false)
172
- io_stream = File.open(filepath, 'rb')
173
- super(io_stream, File.basename(filepath), fix_pdf: fix_pdf)
174
- end
175
- end
176
-
177
- # Load a document from a base64 string.
178
- class Base64InputSource < LocalInputSource
179
- # @param base64_string [String]
180
- # @param filename [String]
181
- # @param fix_pdf [Boolean]
182
- def initialize(base64_string, filename, fix_pdf: false)
183
- io_stream = StringIO.new(base64_string.unpack1('m*'))
184
- io_stream.set_encoding Encoding::BINARY
185
- super(io_stream, filename, fix_pdf: fix_pdf)
186
- end
187
-
188
- # Overload of the same function to prevent a base64 from being re-encoded.
189
- # @param close [Boolean]
190
- # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
191
- def read_document(close: true)
192
- @io_stream.seek(0)
193
- data = @io_stream.read
194
- @io_stream.close if close
195
- ['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
196
- end
197
- end
198
-
199
- # Load a document from raw bytes.
200
- class BytesInputSource < LocalInputSource
201
- # @param raw_bytes [String]
202
- # @param filename [String]
203
- # @param fix_pdf [Boolean]
204
- def initialize(raw_bytes, filename, fix_pdf: false)
205
- io_stream = StringIO.new(raw_bytes)
206
- io_stream.set_encoding Encoding::BINARY
207
- super(io_stream, filename, fix_pdf: fix_pdf)
208
- end
209
- end
210
-
211
- # Load a document from a file handle.
212
- class FileInputSource < LocalInputSource
213
- # @param input_file [File]
214
- # @param filename [String]
215
- # @param fix_pdf [Boolean]
216
- def initialize(input_file, filename, fix_pdf: false)
217
- io_stream = input_file
218
- super(io_stream, filename, fix_pdf: fix_pdf)
219
- end
220
- end
221
-
222
- # Load a remote document from a file url.
223
- class UrlInputSource
224
- # @return [String]
225
- attr_reader :url
226
-
227
- def initialize(url)
228
- raise 'URL must be HTTPS' unless url.start_with? 'https://'
229
-
230
- @url = url
231
- end
232
- end
233
-
234
- # Replaces non-ASCII characters by their unicode escape sequence.
235
- # Keeps other characters as is.
236
- # @return A clean String.
237
- def self.convert_to_unicode_escape(string)
238
- unicode_escape_string = ''.dup
239
- string.each_char do |char|
240
- unicode_escape_string << if char.bytesize > 1
241
- "\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
242
- else
243
- char
244
- end
245
- end
246
- unicode_escape_string
247
- end
248
- end
249
- end
250
- end
3
+ require_relative 'sources/local_input_source'
4
+ require_relative 'sources/bytes_input_source'
5
+ require_relative 'sources/base64_input_source'
6
+ require_relative 'sources/file_input_source'
7
+ require_relative 'sources/path_input_source'
8
+ require_relative 'sources/url_input_source'
@@ -32,7 +32,7 @@ module Mindee
32
32
  attr_reader :id
33
33
  # @return [Mindee::Parsing::Standard::DateField]
34
34
  attr_reader :issued_at
35
- # @return [Mindee::Parsing::Standard::DateField, nil]
35
+ # @return [Time, nil]
36
36
  attr_reader :available_at
37
37
  # @return [JobStatus, Symbol]
38
38
  attr_reader :status
@@ -121,6 +121,27 @@ module Mindee
121
121
  @job = Mindee::Parsing::Common::Job.new(http_response['job']) if http_response.key?('job')
122
122
  end
123
123
  end
124
+
125
+ # Represents the server response after a document is sent to a workflow.
126
+ class WorkflowResponse
127
+ # Set the prediction model used to parse the document.
128
+ # The response object will be instantiated based on this parameter.
129
+ # @return [Mindee::Parsing::Common::Execution]
130
+ attr_reader :execution
131
+ # @return [Mindee::Parsing::Common::ApiRequest]
132
+ attr_reader :api_request
133
+ # @return [String]
134
+ attr_reader :raw_http
135
+
136
+ # @param http_response [Hash]
137
+ # @param product_class [Mindee::Inference]
138
+ def initialize(product_class, http_response, raw_http)
139
+ @raw_http = raw_http.to_s
140
+ @api_request = Mindee::Parsing::Common::ApiRequest.new(http_response['api_request'])
141
+ product_class = (product_class || Product::Generated::GeneratedV1)
142
+ @execution = Mindee::Parsing::Common::Execution.new(product_class, http_response['execution'])
143
+ end
144
+ end
124
145
  end
125
146
  end
126
147
  end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Parsing
5
+ module Common
6
+ # Identifier for the batch to which the execution belongs.
7
+ class Execution
8
+ # Identifier for the batch to which the execution belongs.
9
+ # @return [String]
10
+ attr_reader :batch_name
11
+ # The time at which the execution started.
12
+ # @return [Time, nil]
13
+ attr_reader :created_at
14
+ # File representation within a workflow execution.
15
+ # @return [ExecutionFile]
16
+ attr_reader :file
17
+ # Identifier for the execution.
18
+ # @return [String]
19
+ attr_reader :id
20
+ # Deserialized inference object.
21
+ # @return [Mindee::Inference]
22
+ attr_reader :inference
23
+ # Priority of the execution.
24
+ # @return [ExecutionPriority]
25
+ attr_reader :priority
26
+ # The time at which the file was tagged as reviewed.
27
+ # @return [Time, nil]
28
+ attr_reader :reviewed_at
29
+ # The time at which the file was uploaded to a workflow.
30
+ # @return [Time, nil]
31
+ attr_reader :available_at
32
+ # Reviewed fields and values.
33
+ # @return [Mindee::Product::Generated::GeneratedV1Document]
34
+ attr_reader :reviewed_prediction
35
+ # Execution Status.
36
+ # @return [String]
37
+ attr_reader :status
38
+ # Execution type.
39
+ # @return [String]
40
+ attr_reader :type
41
+ # The time at which the file was uploaded to a workflow.
42
+ # @return [Time, nil]
43
+ attr_reader :uploaded_at
44
+ # Identifier for the workflow.
45
+ # @return [String]
46
+ attr_reader :workflow_id
47
+
48
+ # rubocop:disable Metrics/CyclomaticComplexity
49
+
50
+ # @param product_class [Mindee::Inference]
51
+ # @param http_response [Hash]
52
+ def initialize(product_class, http_response)
53
+ @batch_name = http_response['batch_name']
54
+ @created_at = Time.iso8601(http_response['created_at']) if http_response['created_at']
55
+ @file = ExecutionFile.new(http_response['file']) if http_response['file']
56
+ @id = http_response['id']
57
+ @inference = product_class.new(http_response['inference']) if http_response['inference']
58
+ @priority = Mindee::Parsing::Common::ExecutionPriority.to_priority(http_response['priority'])
59
+ @reviewed_at = Time.iso8601(http_response['reviewed_at']) if http_response['reviewed_at']
60
+ @available_at = Time.iso8601(http_response['available_at']) if http_response['available_at']
61
+ if http_response['reviewed_prediction']
62
+ @reviewed_prediction = GeneratedV1Document.new(http_response['reviewed_prediction'])
63
+ end
64
+ @status = http_response['status']
65
+ @type = http_response['type']
66
+ @uploaded_at = Time.iso8601(http_response['uploaded_at']) if http_response['uploaded_at']
67
+ @workflow_id = http_response['workflow_id']
68
+ end
69
+ # rubocop:enable Metrics/CyclomaticComplexity
70
+ end
71
+ end
72
+ end
73
+ end