mindee 3.1.1 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +16 -0
  3. data/README.md +57 -7
  4. data/bin/mindee.rb +160 -83
  5. data/docs/bank_account_details_v2.md +137 -0
  6. data/docs/bank_check_v1.md +179 -0
  7. data/docs/barcode_reader_v1.md +104 -0
  8. data/docs/carte_vitale_v1.md +123 -0
  9. data/docs/code_samples/barcode_reader_v1.txt +19 -0
  10. data/docs/code_samples/cropper_v1.txt +16 -0
  11. data/docs/code_samples/idcard_fr_v2.txt +19 -0
  12. data/docs/code_samples/invoice_splitter_v1_async.txt +6 -54
  13. data/docs/code_samples/multi_receipts_detector_v1.txt +19 -0
  14. data/docs/code_samples/us_w9_v1.txt +16 -0
  15. data/docs/cropper_v1.md +97 -0
  16. data/docs/custom_v1.md +101 -0
  17. data/docs/expense_receipts_v5.md +306 -0
  18. data/docs/financial_document_v1.md +384 -0
  19. data/docs/{ruby-getting-started.md → getting_started.md} +22 -6
  20. data/docs/idcard_fr_v2.md +253 -0
  21. data/docs/invoice_splitter_v1.md +85 -0
  22. data/docs/invoices_v4.md +369 -0
  23. data/docs/license_plates_v1.md +91 -0
  24. data/docs/multi_receipts_detector_v1.md +105 -0
  25. data/docs/passport_v1.md +186 -0
  26. data/docs/proof_of_address_v1.md +207 -0
  27. data/docs/us_driver_license_v1.md +268 -0
  28. data/docs/us_w9_v1.md +207 -0
  29. data/lib/mindee/client.rb +95 -16
  30. data/lib/mindee/geometry/quadrilateral.rb +5 -0
  31. data/lib/mindee/http/.rubocop.yml +8 -0
  32. data/lib/mindee/http/endpoint.rb +14 -6
  33. data/lib/mindee/http/error.rb +104 -0
  34. data/lib/mindee/http.rb +1 -0
  35. data/lib/mindee/input/sources.rb +83 -14
  36. data/lib/mindee/parsing/common/api_response.rb +11 -1
  37. data/lib/mindee/parsing/common/inference.rb +2 -2
  38. data/lib/mindee/parsing/common/ocr/ocr.rb +1 -0
  39. data/lib/mindee/parsing/common.rb +0 -1
  40. data/lib/mindee/parsing/standard/company_registration_field.rb +1 -1
  41. data/lib/mindee/parsing/standard/locale_field.rb +1 -1
  42. data/lib/mindee/parsing/standard/payment_details_field.rb +1 -1
  43. data/lib/mindee/parsing/standard/position_field.rb +10 -3
  44. data/lib/mindee/parsing/standard/{text_field.rb → string_field.rb} +1 -1
  45. data/lib/mindee/parsing/standard.rb +1 -1
  46. data/lib/mindee/pdf/pdf_processing.rb +2 -1
  47. data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +37 -0
  48. data/lib/mindee/product/barcode_reader/barcode_reader_v1_document.rb +44 -0
  49. data/lib/mindee/product/barcode_reader/barcode_reader_v1_page.rb +32 -0
  50. data/lib/mindee/product/cropper/cropper_v1.rb +37 -0
  51. data/lib/mindee/product/cropper/cropper_v1_document.rb +13 -0
  52. data/lib/mindee/product/cropper/cropper_v1_page.rb +49 -0
  53. data/lib/mindee/product/custom/custom_v1.rb +1 -0
  54. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +1 -0
  55. data/lib/mindee/product/eu/license_plate/license_plate_v1_document.rb +2 -2
  56. data/lib/mindee/product/financial_document/financial_document_v1.rb +1 -0
  57. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +24 -24
  58. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +1 -0
  59. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1_document.rb +6 -6
  60. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +1 -0
  61. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_document.rb +6 -6
  62. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +1 -0
  63. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +6 -6
  64. data/lib/mindee/product/fr/id_card/id_card_v1.rb +1 -0
  65. data/lib/mindee/product/fr/id_card/id_card_v1_document.rb +16 -16
  66. data/lib/mindee/product/fr/id_card/id_card_v2.rb +39 -0
  67. data/lib/mindee/product/fr/id_card/id_card_v2_document.rb +107 -0
  68. data/lib/mindee/product/fr/id_card/id_card_v2_page.rb +53 -0
  69. data/lib/mindee/product/invoice/invoice_v4.rb +1 -0
  70. data/lib/mindee/product/invoice/invoice_v4_document.rb +24 -24
  71. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1.rb +1 -0
  72. data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +5 -3
  73. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +37 -0
  74. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +35 -0
  75. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +32 -0
  76. data/lib/mindee/product/passport/passport_v1.rb +1 -0
  77. data/lib/mindee/product/passport/passport_v1_document.rb +16 -16
  78. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +1 -0
  79. data/lib/mindee/product/proof_of_address/proof_of_address_v1_document.rb +14 -14
  80. data/lib/mindee/product/receipt/receipt_v4_document.rb +6 -6
  81. data/lib/mindee/product/receipt/receipt_v5.rb +1 -0
  82. data/lib/mindee/product/receipt/receipt_v5_document.rb +12 -12
  83. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +1 -0
  84. data/lib/mindee/product/us/bank_check/bank_check_v1_document.rb +8 -8
  85. data/lib/mindee/product/us/driver_license/driver_license_v1.rb +1 -0
  86. data/lib/mindee/product/us/driver_license/driver_license_v1_document.rb +28 -28
  87. data/lib/mindee/product/us/w9/w9_v1.rb +39 -0
  88. data/lib/mindee/product/us/w9/w9_v1_document.rb +15 -0
  89. data/lib/mindee/product/us/w9/w9_v1_page.rb +102 -0
  90. data/lib/mindee/product.rb +5 -0
  91. data/lib/mindee/version.rb +5 -1
  92. data/lib/mindee.rb +47 -0
  93. metadata +43 -9
  94. data/docs/ruby-api-builder.md +0 -123
  95. data/docs/ruby-invoice-ocr.md +0 -271
  96. data/docs/ruby-passport-ocr.md +0 -165
  97. data/docs/ruby-receipt-ocr.md +0 -196
  98. data/lib/mindee/parsing/common/error.rb +0 -24
data/docs/us_w9_v1.md ADDED
@@ -0,0 +1,207 @@
1
+ ---
2
+ title: US W9 OCR Ruby
3
+ ---
4
+ The Ruby OCR SDK supports the [US W9 API](https://platform.mindee.com/mindee/us_w9).
5
+
6
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_w9/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
7
+ ![US W9 sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_w9/default_sample.jpg?raw=true)
8
+
9
+ # Quick-Start
10
+ ```rb
11
+ require 'mindee'
12
+
13
+ # Init a new client
14
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
15
+
16
+ # Load a file from disk
17
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
18
+
19
+ # Parse the file
20
+ result = mindee_client.parse(
21
+ input_source,
22
+ Mindee::Product::US::W9::W9V1
23
+ )
24
+
25
+ # Print a full summary of the parsed data in RST format
26
+ puts result.document
27
+ ```
28
+
29
+ **Output (RST):**
30
+ ```rst
31
+ ########
32
+ Document
33
+ ########
34
+ :Mindee ID: d7c5b25f-e0d3-4491-af54-6183afa1aaab
35
+ :Filename: default_sample.jpg
36
+
37
+ Inference
38
+ #########
39
+ :Product: mindee/us_w9 v1.0
40
+ :Rotation applied: Yes
41
+
42
+ Prediction
43
+ ==========
44
+
45
+ Page Predictions
46
+ ================
47
+
48
+ Page 0
49
+ ------
50
+ :Name: Stephen W Hawking
51
+ :SSN: 560758145
52
+ :Address: Somewhere In Milky Way
53
+ :City State Zip: Probably Still At Cambridge P O Box CB1
54
+ :Business Name:
55
+ :EIN: 942203664
56
+ :Tax Classification: individual
57
+ :Tax Classification Other Details:
58
+ :W9 Revision Date: august 2013
59
+ :Signature Position: Polygon with 4 points.
60
+ :Signature Date Position:
61
+ :Tax Classification LLC:
62
+ ```
63
+
64
+ # Field Types
65
+ ## Standard Fields
66
+ These fields are generic and used in several products.
67
+
68
+ ### Basic Field
69
+ Each prediction object contains a set of fields that inherit from the generic `Field` class.
70
+ A typical `Field` object will have the following attributes:
71
+
72
+ * **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
73
+ * **confidence** (Float, nil): the confidence score of the field prediction.
74
+ * **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
75
+ * **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
76
+ * **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
77
+ * **reconstructed** (`Boolean`): indicates whether or not an object was reconstructed (not extracted as the API gave it).
78
+
79
+
80
+ Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
81
+
82
+
83
+ ### Position Field
84
+ The position field `PositionField` does not implement all the basic `Field` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to:
85
+
86
+ * **rectangle** (`Mindee::Geometry::Quadrilateral`): a Polygon with four points that may be oriented (even beyond canvas).
87
+ * **quadrangle** (`Mindee::Geometry::Quadrilateral`): a free polygon made up of four points.
88
+
89
+ ### String Field
90
+ The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
91
+
92
+ ## Page-Level Fields
93
+ Some fields are constrained to the page level, and so will not be retrievable to through the document.
94
+
95
+ # Attributes
96
+ The following fields are extracted for US W9 V1:
97
+
98
+ ## Address
99
+ [📄](#page-level-fields "This field is only present on individual pages.")**address** ([StringField](#string-field)): The street address (number, street, and apt. or suite no.) of the applicant.
100
+
101
+ ```rb
102
+ for address_elem in result.document.address do
103
+ puts address_elem.value
104
+ end
105
+ ```
106
+
107
+ ## Business Name
108
+ [📄](#page-level-fields "This field is only present on individual pages.")**business_name** ([StringField](#string-field)): The business name or disregarded entity name, if different from Name.
109
+
110
+ ```rb
111
+ for business_name_elem in result.document.business_name do
112
+ puts business_name_elem.value
113
+ end
114
+ ```
115
+
116
+ ## City State Zip
117
+ [📄](#page-level-fields "This field is only present on individual pages.")**city_state_zip** ([StringField](#string-field)): The city, state, and ZIP code of the applicant.
118
+
119
+ ```rb
120
+ for city_state_zip_elem in result.document.city_state_zip do
121
+ puts city_state_zip_elem.value
122
+ end
123
+ ```
124
+
125
+ ## EIN
126
+ [📄](#page-level-fields "This field is only present on individual pages.")**ein** ([StringField](#string-field)): The employer identification number.
127
+
128
+ ```rb
129
+ for ein_elem in result.document.ein do
130
+ puts ein_elem.value
131
+ end
132
+ ```
133
+
134
+ ## Name
135
+ [📄](#page-level-fields "This field is only present on individual pages.")**name** ([StringField](#string-field)): Name as shown on the applicant's income tax return.
136
+
137
+ ```rb
138
+ for name_elem in result.document.name do
139
+ puts name_elem.value
140
+ end
141
+ ```
142
+
143
+ ## Signature Date Position
144
+ [📄](#page-level-fields "This field is only present on individual pages.")**signature_date_position** ([PositionField](#position-field)): Position of the signature date on the document.
145
+
146
+ ```rb
147
+ for signature_date_position_elem in result.document.signature_date_position do
148
+ puts signature_date_position_elem.polygon
149
+ end
150
+ ```
151
+
152
+ ## Signature Position
153
+ [📄](#page-level-fields "This field is only present on individual pages.")**signature_position** ([PositionField](#position-field)): Position of the signature on the document.
154
+
155
+ ```rb
156
+ for signature_position_elem in result.document.signature_position do
157
+ puts signature_position_elem.polygon
158
+ end
159
+ ```
160
+
161
+ ## SSN
162
+ [📄](#page-level-fields "This field is only present on individual pages.")**ssn** ([StringField](#string-field)): The applicant's social security number.
163
+
164
+ ```rb
165
+ for ssn_elem in result.document.ssn do
166
+ puts ssn_elem.value
167
+ end
168
+ ```
169
+
170
+ ## Tax Classification
171
+ [📄](#page-level-fields "This field is only present on individual pages.")**tax_classification** ([StringField](#string-field)): The federal tax classification, which can vary depending on the revision date.
172
+
173
+ ```rb
174
+ for tax_classification_elem in result.document.tax_classification do
175
+ puts tax_classification_elem.value
176
+ end
177
+ ```
178
+
179
+ ## Tax Classification LLC
180
+ [📄](#page-level-fields "This field is only present on individual pages.")**tax_classification_llc** ([StringField](#string-field)): Depending on revision year, among S, C, P or D for Limited Liability Company Classification.
181
+
182
+ ```rb
183
+ for tax_classification_llc_elem in result.document.tax_classification_llc do
184
+ puts tax_classification_llc_elem.value
185
+ end
186
+ ```
187
+
188
+ ## Tax Classification Other Details
189
+ [📄](#page-level-fields "This field is only present on individual pages.")**tax_classification_other_details** ([StringField](#string-field)): Tax Classification Other Details.
190
+
191
+ ```rb
192
+ for tax_classification_other_details_elem in result.document.tax_classification_other_details do
193
+ puts tax_classification_other_details_elem.value
194
+ end
195
+ ```
196
+
197
+ ## W9 Revision Date
198
+ [📄](#page-level-fields "This field is only present on individual pages.")**w9_revision_date** ([StringField](#string-field)): The Revision month and year of the W9 form.
199
+
200
+ ```rb
201
+ for w9_revision_date_elem in result.document.w9_revision_date do
202
+ puts w9_revision_date_elem.value
203
+ end
204
+ ```
205
+
206
+ # Questions?
207
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/lib/mindee/client.rb CHANGED
@@ -17,7 +17,7 @@ module Mindee
17
17
  # Call prediction API on a document and parse the results.
18
18
  #
19
19
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
20
- #
20
+ # @param product_class [Mindee::Product] class of the product
21
21
  # @param endpoint [HTTP::Endpoint] Endpoint of the API
22
22
  # Doesn't need to be set in the case of OTS APIs.
23
23
  #
@@ -52,14 +52,14 @@ module Mindee
52
52
  input_source.process_pdf(page_options)
53
53
  end
54
54
  endpoint = initialize_endpoint(product_class) if endpoint.nil?
55
- prediction = endpoint.predict(input_source, all_words, close_file, cropper)
56
- Mindee::Parsing::Common::ApiResponse.new(product_class, prediction)
55
+ prediction, raw_http = endpoint.predict(input_source, all_words, close_file, cropper)
56
+ Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
57
57
  end
58
58
 
59
59
  # Enqueue a document for async parsing
60
60
  #
61
61
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
62
- #
62
+ # @param product_class [Mindee::Product] class of the product
63
63
  # @param endpoint [HTTP::Endpoint, nil] Endpoint of the API.
64
64
  # Doesn't need to be set in the case of OTS APIs.
65
65
  #
@@ -94,17 +94,18 @@ module Mindee
94
94
  input_source.process_pdf(page_options)
95
95
  end
96
96
  endpoint = initialize_endpoint(product_class) if endpoint.nil?
97
+ prediction, raw_http = endpoint.predict_async(input_source, all_words, close_file, cropper)
97
98
  Mindee::Parsing::Common::ApiResponse.new(product_class,
98
- endpoint.predict_async(input_source, all_words, close_file, cropper))
99
+ prediction, raw_http)
99
100
  end
100
101
 
101
102
  # Parses a queued document
102
103
  #
104
+ # @param job_id [String] Id of the job (queue) to poll from
105
+ # @param product_class [Mindee::Product] class of the product
103
106
  # @param endpoint [HTTP::Endpoint, nil] Endpoint of the API
104
107
  # Doesn't need to be set in the case of OTS APIs.
105
108
  #
106
- # @param job_id [String] Id of the job (queue) to poll from
107
- #
108
109
  # @return [Mindee::Parsing::Common::ApiResponse]
109
110
  def parse_queued(
110
111
  job_id,
@@ -112,38 +113,106 @@ module Mindee
112
113
  endpoint: nil
113
114
  )
114
115
  endpoint = initialize_endpoint(product_class) if endpoint.nil?
115
- Mindee::Parsing::Common::ApiResponse.new(product_class, endpoint.parse_async(job_id))
116
+ prediction, raw_http = endpoint.parse_async(job_id)
117
+ Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
116
118
  end
117
119
 
120
+ # rubocop:disable Metrics/ParameterLists
121
+
122
+ # Enqueue a document for async parsing and automatically try to retrieve it
123
+ #
124
+ # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
125
+ # @param product_class [Mindee::Product] class of the product
126
+ # @param endpoint [HTTP::Endpoint, nil] Endpoint of the API.
127
+ # Doesn't need to be set in the case of OTS APIs.
128
+ # @param all_words [Boolean] Whether to extract all the words on each page.
129
+ # This performs a full OCR operation on the server and will increase response time.
130
+ # @param close_file [Boolean] Whether to `close()` the file after parsing it.
131
+ # Set to false if you need to access the file after this operation.
132
+ # @param page_options [Hash, nil] Page cutting/merge options:
133
+ # * `:page_indexes` Zero-based list of page indexes.
134
+ # * `:operation` Operation to apply on the document, given the `page_indexes specified:
135
+ # * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
136
+ # * `:REMOVE` - remove the specified pages, and keep all others.
137
+ # * `:on_min_pages` Apply the operation only if document has at least this many pages.
138
+ # @param cropper [Boolean, nil] Whether to include cropper results for each page.
139
+ # This performs a cropping operation on the server and will increase response time.
140
+ # @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 6.
141
+ # @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 3.
142
+ # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 10.
143
+ # @return [Mindee::Parsing::Common::ApiResponse]
144
+ def enqueue_and_parse(
145
+ input_source,
146
+ product_class,
147
+ endpoint: nil,
148
+ all_words: false,
149
+ close_file: true,
150
+ page_options: nil,
151
+ cropper: false,
152
+ initial_delay_sec: 6,
153
+ delay_sec: 3,
154
+ max_retries: 10
155
+ )
156
+ enqueue_res = enqueue(
157
+ input_source,
158
+ product_class,
159
+ endpoint: endpoint,
160
+ all_words: all_words,
161
+ close_file: close_file,
162
+ page_options: page_options,
163
+ cropper: cropper
164
+ )
165
+ sleep(initial_delay_sec)
166
+ polling_attempts = 1
167
+ job_id = enqueue_res.job.id
168
+ queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
169
+ while (queue_res.job.status != Mindee::Parsing::Common::JobStatus::COMPLETED) && (polling_attempts < max_retries)
170
+ sleep(delay_sec)
171
+ queue_res = parse_queued(job_id, product_class, endpoint: endpoint)
172
+ polling_attempts += 1
173
+ end
174
+ if queue_res.job.status != Mindee::Parsing::Common::JobStatus::COMPLETED
175
+ elapsed = initial_delay_sec + (polling_attempts * delay_sec)
176
+ raise "Asynchronous parsing request timed out after #{elapsed} seconds (#{polling_attempts} tries)"
177
+ end
178
+
179
+ queue_res
180
+ end
181
+ # rubocop:enable Metrics/ParameterLists
182
+
118
183
  # Load a document from an absolute path, as a string.
119
184
  # @param input_path [String] Path of file to open
185
+ # @param fix_pdf [Boolean] Attempts to fix broken pdf if true
120
186
  # @return [Mindee::Input::Source::PathInputSource]
121
- def source_from_path(input_path)
122
- Input::Source::PathInputSource.new(input_path)
187
+ def source_from_path(input_path, fix_pdf: false)
188
+ Input::Source::PathInputSource.new(input_path, fix_pdf: fix_pdf)
123
189
  end
124
190
 
125
191
  # Load a document from raw bytes.
126
192
  # @param input_bytes [String] Encoding::BINARY byte input
127
193
  # @param filename [String] The name of the file (without the path)
194
+ # @param fix_pdf [Boolean] Attempts to fix broken pdf if true
128
195
  # @return [Mindee::Input::Source::BytesInputSource]
129
- def source_from_bytes(input_bytes, filename)
130
- Input::Source::BytesInputSource.new(input_bytes, filename)
196
+ def source_from_bytes(input_bytes, filename, fix_pdf: false)
197
+ Input::Source::BytesInputSource.new(input_bytes, filename, fix_pdf: fix_pdf)
131
198
  end
132
199
 
133
200
  # Load a document from a base64 encoded string.
134
201
  # @param base64_string [String] Input to parse as base64 string
135
202
  # @param filename [String] The name of the file (without the path)
203
+ # @param fix_pdf [Boolean] Attempts to fix broken pdf if true
136
204
  # @return [Mindee::Input::Source::Base64InputSource]
137
- def source_from_b64string(base64_string, filename)
138
- Input::Source::Base64InputSource.new(base64_string, filename)
205
+ def source_from_b64string(base64_string, filename, fix_pdf: false)
206
+ Input::Source::Base64InputSource.new(base64_string, filename, fix_pdf: fix_pdf)
139
207
  end
140
208
 
141
209
  # Load a document from a normal Ruby `File`.
142
210
  # @param input_file [File] Input file handle
143
211
  # @param filename [String] The name of the file (without the path)
212
+ # @param fix_pdf [Boolean] Attempts to fix broken pdf if true
144
213
  # @return [Mindee::Input::Source::FileInputSource]
145
- def source_from_file(input_file, filename)
146
- Input::Source::FileInputSource.new(input_file, filename)
214
+ def source_from_file(input_file, filename, fix_pdf: false)
215
+ Input::Source::FileInputSource.new(input_file, filename, fix_pdf: fix_pdf)
147
216
  end
148
217
 
149
218
  # Load a document from a secure remote source (HTTPS).
@@ -171,6 +240,16 @@ module Mindee
171
240
 
172
241
  private
173
242
 
243
+ # Validates the parameters for async auto-polling
244
+ # @param initial_delay_sec [Integer, Float] initial delay before polling
245
+ # @param delay_sec [Integer, Float] delay between polling attempts
246
+ # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 10.
247
+ def validate_async_params(initial_delay_sec, delay_sec, max_retries)
248
+ raise 'Cannot set auto-poll delay to less than 2 seconds' if delay_sec < 2
249
+ raise 'Cannot set initial parsing delay to less than 4 seconds' if initial_delay_sec < 4
250
+ raise 'Cannot set auto-poll delay to less than 2 seconds' unless max_retries.is_a? Integer
251
+ end
252
+
174
253
  # Creates an endpoint with the given values. Raises an error if the endpoint is invalid.
175
254
  # @param product_class [Mindee::Product] class of the product
176
255
  #
@@ -40,6 +40,11 @@ module Mindee
40
40
  throw '0, 1, 2, 3 only'
41
41
  end
42
42
  end
43
+
44
+ # A quadrilateral has four corners, always.
45
+ def size
46
+ 4
47
+ end
43
48
  end
44
49
  end
45
50
  end
@@ -0,0 +1,8 @@
1
+ inherit_from: ../../../.rubocop.yml
2
+
3
+ Metrics/CyclomaticComplexity:
4
+ Max: 10
5
+
6
+ Metrics/PerceivedComplexity:
7
+ Max: 10
8
+
@@ -2,19 +2,27 @@
2
2
 
3
3
  require 'json'
4
4
  require 'net/http'
5
+ require_relative 'error'
5
6
  require_relative '../version'
6
7
 
7
8
  module Mindee
8
9
  module HTTP
10
+ # API key's default environment key name.
9
11
  API_KEY_ENV_NAME = 'MINDEE_API_KEY'
12
+ # API key's default value.
10
13
  API_KEY_DEFAULT = nil
11
14
 
15
+ # Base URL default environment key name.
12
16
  BASE_URL_ENV_NAME = 'MINDEE_BASE_URL'
17
+ # Base URL's default value.
13
18
  BASE_URL_DEFAULT = 'https://api.mindee.net/v1'
14
19
 
20
+ # HTTP request timeout default environment key name.
15
21
  REQUEST_TIMEOUT_ENV_NAME = 'MINDEE_REQUEST_TIMEOUT'
22
+ # HTTP request timeout default value.
16
23
  TIMEOUT_DEFAULT = 120
17
24
 
25
+ # Default value for the user agent.
18
26
  USER_AGENT = "mindee-api-ruby@v#{Mindee::VERSION} ruby-v#{RUBY_VERSION} #{Mindee::PLATFORM}"
19
27
 
20
28
  # Generic API endpoint for a product.
@@ -43,9 +51,9 @@ module Mindee
43
51
  check_api_key
44
52
  response = predict_req_post(input_source, all_words: all_words, close_file: close_file, cropper: cropper)
45
53
  hashed_response = JSON.parse(response.body, object_class: Hash)
46
- return hashed_response if (200..299).include?(response.code.to_i)
54
+ return [hashed_response, response.body] if (200..299).include?(response.code.to_i)
47
55
 
48
- error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
56
+ error = Error.handle_error!(@url_name, hashed_response, response.code.to_i)
49
57
  raise error
50
58
  end
51
59
 
@@ -58,9 +66,9 @@ module Mindee
58
66
  check_api_key
59
67
  response = document_queue_req_get(input_source, all_words, close_file, cropper)
60
68
  hashed_response = JSON.parse(response.body, object_class: Hash)
61
- return hashed_response if (200..299).include?(response.code.to_i)
69
+ return [hashed_response, response.body] if (200..299).include?(response.code.to_i)
62
70
 
63
- error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
71
+ error = Error.handle_error!(@url_name, hashed_response, response.code.to_i)
64
72
  raise error
65
73
  end
66
74
 
@@ -71,9 +79,9 @@ module Mindee
71
79
  check_api_key
72
80
  response = document_queue_req(job_id)
73
81
  hashed_response = JSON.parse(response.body, object_class: Hash)
74
- return hashed_response if (200..299).include?(response.code.to_i)
82
+ return [hashed_response, response.body] if (200..299).include?(response.code.to_i)
75
83
 
76
- error = Parsing::Common::HttpError.new(hashed_response['api_request']['error'])
84
+ error = Error.handle_error!(@url_name, hashed_response, response.code.to_i)
77
85
  raise error
78
86
  end
79
87
 
@@ -0,0 +1,104 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module HTTP
5
+ # Mindee HTTP error module.
6
+ module Error
7
+ module_function
8
+
9
+ # Creates an error object based on what's retrieved from a request.
10
+ # @param response [Hash] dictionary response retrieved by the server
11
+ def create_error_obj(response)
12
+ error_obj = response.respond_to?(:each_pair) ? response.dig('api_request', 'error') : nil
13
+ if error_obj.nil?
14
+ error_obj = if response.include?('Maximum pdf pages')
15
+ {
16
+ 'code' => 'TooManyPages',
17
+ 'message' => 'Maximum amound of pdf pages reached.',
18
+ 'details' => response,
19
+ }
20
+ elsif response.include?('Max file size is')
21
+ {
22
+ 'code' => 'FileTooLarge',
23
+ 'message' => 'Maximum file size reached.',
24
+ 'details' => response,
25
+ }
26
+ elsif response.include?('Invalid file type')
27
+ {
28
+ 'code' => 'InvalidFiletype',
29
+ 'message' => 'Invalid file type.',
30
+ 'details' => response,
31
+ }
32
+ elsif response.include?('Gateway timeout')
33
+ {
34
+ 'code' => 'RequestTimeout',
35
+ 'message' => 'Request timed out.',
36
+ 'details' => response,
37
+ }
38
+ elsif response.include?('Too Many Requests')
39
+ {
40
+ 'code' => 'TooManyRequests',
41
+ 'message' => 'Too Many Requests.',
42
+ 'details' => response,
43
+ }
44
+ else
45
+ {
46
+ 'code' => 'UnknownError',
47
+ 'message' => 'Server sent back an unexpected reply.',
48
+ 'details' => response,
49
+ }
50
+ end
51
+
52
+ end
53
+ error_obj
54
+ end
55
+
56
+ # Creates an appropriate HTTP error exception, based on retrieved http error code
57
+ # @param url [String] the url of the product
58
+ # @param response [Hash] dictionary response retrieved by the server
59
+ # @param code [Integer] http error code of the response
60
+ def handle_error!(url, response, code)
61
+ error_obj = create_error_obj(response)
62
+ case code
63
+ when 400..499
64
+ MindeeHttpClientError.new(error_obj, url, code)
65
+ when 500..599
66
+ MindeeHttpServerError.new(error_obj, url, code)
67
+ else
68
+ MindeeHttpError.new(error_obj, url, code)
69
+ end
70
+ end
71
+
72
+ # API HttpError
73
+ class MindeeHttpError < StandardError
74
+ # @return [String]
75
+ attr_reader :status_code
76
+ # @return [String]
77
+ attr_reader :api_code
78
+ # @return [String]
79
+ attr_reader :api_details
80
+ # @return [String]
81
+ attr_reader :api_message
82
+
83
+ # @param http_error [Hash]
84
+ # @param url [String]
85
+ # @param code [Integer]
86
+ def initialize(http_error, url, code)
87
+ @status_code = code
88
+ @api_code = http_error['code']
89
+ @api_details = http_error['details']
90
+ @api_message = http_error['message']
91
+ super("#{url} #{@status_code} HTTP error: #{@api_details} - #{@api_message}")
92
+ end
93
+ end
94
+
95
+ # API client HttpError
96
+ class MindeeHttpClientError < MindeeHttpError
97
+ end
98
+
99
+ # API server HttpError
100
+ class MindeeHttpServerError < MindeeHttpError
101
+ end
102
+ end
103
+ end
104
+ end
data/lib/mindee/http.rb CHANGED
@@ -1,3 +1,4 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'http/endpoint'
4
+ require_relative 'http/error'