mindee 3.4.0 → 3.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +20 -0
  3. data/README.md +4 -0
  4. data/bin/mindee.rb +39 -15
  5. data/docs/bank_statement_fr_v1.md +175 -0
  6. data/docs/carte_grise_v1.md +4 -4
  7. data/docs/code_samples/bank_statement_fr_v1_async.txt +19 -0
  8. data/docs/code_samples/default.txt +19 -19
  9. data/docs/code_samples/default_async.txt +25 -0
  10. data/docs/code_samples/eu_driver_license_v1.txt +19 -0
  11. data/docs/code_samples/international_id_v1_async.txt +19 -0
  12. data/docs/code_samples/international_id_v2_async.txt +19 -0
  13. data/docs/eu_driver_license_v1.md +223 -0
  14. data/docs/financial_document_v1.md +48 -40
  15. data/docs/generated_v1.md +90 -0
  16. data/docs/invoices_v4.md +4 -1
  17. data/docs/proof_of_address_v1.md +4 -4
  18. data/docs/us_driver_license_v1.md +2 -2
  19. data/lib/mindee/client.rb +5 -3
  20. data/lib/mindee/http/endpoint.rb +13 -12
  21. data/lib/mindee/input/sources.rb +28 -5
  22. data/lib/mindee/parsing/common/inference.rb +3 -1
  23. data/lib/mindee/parsing/generated/generated_list_field.rb +58 -0
  24. data/lib/mindee/parsing/generated/generated_object_field.rb +109 -0
  25. data/lib/mindee/parsing/generated.rb +4 -0
  26. data/lib/mindee/parsing.rb +1 -0
  27. data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +3 -1
  28. data/lib/mindee/product/cropper/cropper_v1.rb +3 -1
  29. data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +41 -0
  30. data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +88 -0
  31. data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +53 -0
  32. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +3 -1
  33. data/lib/mindee/product/financial_document/financial_document_v1.rb +3 -1
  34. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +3 -1
  35. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +3 -1
  36. data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +41 -0
  37. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_document.rb +130 -0
  38. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +34 -0
  39. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_transaction.rb +64 -0
  40. data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +3 -1
  41. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +3 -1
  42. data/lib/mindee/product/fr/id_card/id_card_v1.rb +3 -1
  43. data/lib/mindee/product/fr/id_card/id_card_v2.rb +3 -1
  44. data/lib/mindee/product/generated/generated_v1.rb +38 -0
  45. data/lib/mindee/product/generated/generated_v1_document.rb +35 -0
  46. data/lib/mindee/product/generated/generated_v1_page.rb +51 -0
  47. data/lib/mindee/product/generated/generated_v1_prediction.rb +114 -0
  48. data/lib/mindee/product/international_id/international_id_v1.rb +39 -0
  49. data/lib/mindee/product/international_id/international_id_v1_document.rb +109 -0
  50. data/lib/mindee/product/international_id/international_id_v1_page.rb +32 -0
  51. data/lib/mindee/product/international_id/international_id_v2.rb +39 -0
  52. data/lib/mindee/product/international_id/international_id_v2_document.rb +119 -0
  53. data/lib/mindee/product/international_id/international_id_v2_page.rb +32 -0
  54. data/lib/mindee/product/invoice/invoice_v4.rb +3 -1
  55. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +3 -1
  56. data/lib/mindee/product/passport/passport_v1.rb +3 -1
  57. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +3 -1
  58. data/lib/mindee/product/receipt/receipt_v5.rb +3 -1
  59. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +3 -1
  60. data/lib/mindee/product/us/driver_license/driver_license_v1.rb +3 -1
  61. data/lib/mindee/product/us/w9/w9_v1.rb +3 -1
  62. data/lib/mindee/product.rb +5 -0
  63. data/lib/mindee/version.rb +1 -1
  64. data/lib/mindee.rb +4 -0
  65. metadata +30 -2
@@ -0,0 +1,223 @@
1
+ ---
2
+ title: EU EU Driver License OCR Ruby
3
+ ---
4
+ The Ruby OCR SDK supports the [EU Driver License API](https://platform.mindee.com/mindee/eu_driver_license).
5
+
6
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/eu_driver_license/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
7
+ ![EU Driver License sample](https://github.com/mindee/client-lib-test-data/blob/main/products/eu_driver_license/default_sample.jpg?raw=true)
8
+
9
+ # Quick-Start
10
+ ```rb
11
+ require 'mindee'
12
+
13
+ # Init a new client
14
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
15
+
16
+ # Load a file from disk
17
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
18
+
19
+ # Parse the file
20
+ result = mindee_client.parse(
21
+ input_source,
22
+ Mindee::Product::EU::DriverLicense::DriverLicenseV1
23
+ )
24
+
25
+ # Print a full summary of the parsed data in RST format
26
+ puts result.document
27
+
28
+ # Print the document-level parsed data
29
+ # puts result.document.inference.prediction
30
+ ```
31
+
32
+ **Output (RST):**
33
+ ```rst
34
+ ########
35
+ Document
36
+ ########
37
+ :Mindee ID: b19cc32e-b3e6-4ff9-bdc7-619199355d54
38
+ :Filename: default_sample.jpg
39
+
40
+ Inference
41
+ #########
42
+ :Product: mindee/eu_driver_license v1.0
43
+ :Rotation applied: Yes
44
+
45
+ Prediction
46
+ ==========
47
+ :Country Code: FR
48
+ :Document ID: 13AA00002
49
+ :Driver License Category: AM A1 B1 B D BE DE
50
+ :Last Name: MARTIN
51
+ :First Name: PAUL
52
+ :Date Of Birth: 1981-07-14
53
+ :Place Of Birth: Utopiacity
54
+ :Expiry Date: 2018-12-31
55
+ :Issue Date: 2013-01-01
56
+ :Issue Authority: 99999UpiaCity
57
+ :MRZ: D1FRA13AA000026181231MARTIN<<9
58
+ :Address:
59
+
60
+ Page Predictions
61
+ ================
62
+
63
+ Page 0
64
+ ------
65
+ :Photo: Polygon with 4 points.
66
+ :Signature: Polygon with 4 points.
67
+ :Country Code: FR
68
+ :Document ID: 13AA00002
69
+ :Driver License Category: AM A1 B1 B D BE DE
70
+ :Last Name: MARTIN
71
+ :First Name: PAUL
72
+ :Date Of Birth: 1981-07-14
73
+ :Place Of Birth: Utopiacity
74
+ :Expiry Date: 2018-12-31
75
+ :Issue Date: 2013-01-01
76
+ :Issue Authority: 99999UpiaCity
77
+ :MRZ: D1FRA13AA000026181231MARTIN<<9
78
+ :Address:
79
+ ```
80
+
81
+ # Field Types
82
+ ## Standard Fields
83
+ These fields are generic and used in several products.
84
+
85
+ ### Basic Field
86
+ Each prediction object contains a set of fields that inherit from the generic `Field` class.
87
+ A typical `Field` object will have the following attributes:
88
+
89
+ * **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
90
+ * **confidence** (Float, nil): the confidence score of the field prediction.
91
+ * **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
92
+ * **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
93
+ * **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
94
+ * **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
95
+
96
+
97
+ Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
98
+
99
+ ### Date Field
100
+ Aside from the basic `Field` attributes, the date field `DateField` also implements the following:
101
+
102
+ * **date_object** (`Date`): an accessible representation of the value as a JavaScript object.
103
+
104
+
105
+ ### Position Field
106
+ The position field `PositionField` does not implement all the basic `Field` attributes, only **bounding_box**, **polygon** and **page_id**. On top of these, it has access to:
107
+
108
+ * **rectangle** (`Mindee::Geometry::Quadrilateral`): a Polygon with four points that may be oriented (even beyond canvas).
109
+ * **quadrangle** (`Mindee::Geometry::Quadrilateral`): a free polygon made up of four points.
110
+
111
+ ### String Field
112
+ The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
113
+
114
+ ## Page-Level Fields
115
+ Some fields are constrained to the page level, and so will not be retrievable to through the document.
116
+
117
+ # Attributes
118
+ The following fields are extracted for EU Driver License V1:
119
+
120
+ ## Address
121
+ **address** ([StringField](#string-field)): EU driver license holders address
122
+
123
+ ```rb
124
+ puts result.document.inference.prediction.address.value
125
+ ```
126
+
127
+ ## Driver License Category
128
+ **category** ([StringField](#string-field)): EU driver license holders categories
129
+
130
+ ```rb
131
+ puts result.document.inference.prediction.category.value
132
+ ```
133
+
134
+ ## Country Code
135
+ **country_code** ([StringField](#string-field)): Country code extracted as a string.
136
+
137
+ ```rb
138
+ puts result.document.inference.prediction.country_code.value
139
+ ```
140
+
141
+ ## Date Of Birth
142
+ **date_of_birth** ([DateField](#date-field)): The date of birth of the document holder
143
+
144
+ ```rb
145
+ puts result.document.inference.prediction.date_of_birth.value
146
+ ```
147
+
148
+ ## Document ID
149
+ **document_id** ([StringField](#string-field)): ID number of the Document.
150
+
151
+ ```rb
152
+ puts result.document.inference.prediction.document_id.value
153
+ ```
154
+
155
+ ## Expiry Date
156
+ **expiry_date** ([DateField](#date-field)): Date the document expires
157
+
158
+ ```rb
159
+ puts result.document.inference.prediction.expiry_date.value
160
+ ```
161
+
162
+ ## First Name
163
+ **first_name** ([StringField](#string-field)): First name(s) of the driver license holder
164
+
165
+ ```rb
166
+ puts result.document.inference.prediction.first_name.value
167
+ ```
168
+
169
+ ## Issue Authority
170
+ **issue_authority** ([StringField](#string-field)): Authority that issued the document
171
+
172
+ ```rb
173
+ puts result.document.inference.prediction.issue_authority.value
174
+ ```
175
+
176
+ ## Issue Date
177
+ **issue_date** ([DateField](#date-field)): Date the document was issued
178
+
179
+ ```rb
180
+ puts result.document.inference.prediction.issue_date.value
181
+ ```
182
+
183
+ ## Last Name
184
+ **last_name** ([StringField](#string-field)): Last name of the driver license holder.
185
+
186
+ ```rb
187
+ puts result.document.inference.prediction.last_name.value
188
+ ```
189
+
190
+ ## MRZ
191
+ **mrz** ([StringField](#string-field)): Machine-readable license number
192
+
193
+ ```rb
194
+ puts result.document.inference.prediction.mrz.value
195
+ ```
196
+
197
+ ## Photo
198
+ [📄](#page-level-fields "This field is only present on individual pages.")**photo** ([PositionField](#position-field)): Has a photo of the EU driver license holder
199
+
200
+ ```rb
201
+ for photo_elem in result.document.photo do
202
+ puts photo_elem.polygon
203
+ end
204
+ ```
205
+
206
+ ## Place Of Birth
207
+ **place_of_birth** ([StringField](#string-field)): Place where the driver license holder was born
208
+
209
+ ```rb
210
+ puts result.document.inference.prediction.place_of_birth.value
211
+ ```
212
+
213
+ ## Signature
214
+ [📄](#page-level-fields "This field is only present on individual pages.")**signature** ([PositionField](#position-field)): Has a signature of the EU driver license holder
215
+
216
+ ```rb
217
+ for signature_elem in result.document.signature do
218
+ puts signature_elem.polygon
219
+ end
220
+ ```
221
+
222
+ # Questions?
223
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
@@ -34,48 +34,52 @@ puts result.document
34
34
  ########
35
35
  Document
36
36
  ########
37
- :Mindee ID: 81c1d637-3a84-41d9-b40a-f72ca2a58826
37
+ :Mindee ID: a6b54e2d-a7fa-4e08-8de6-6cd296f50f3d
38
38
  :Filename: default_sample.jpg
39
39
 
40
40
  Inference
41
41
  #########
42
- :Product: mindee/financial_document v1.1
42
+ :Product: mindee/financial_document v1.2
43
43
  :Rotation applied: Yes
44
44
 
45
45
  Prediction
46
46
  ==========
47
47
  :Locale: en; en; USD;
48
- :Invoice Number:
49
- :Reference Numbers:
50
- :Purchase Date: 2014-07-07
51
- :Due Date: 2014-07-07
52
- :Total Net: 40.48
53
- :Total Amount: 53.82
48
+ :Invoice Number: INT-001
49
+ :Reference Numbers: 2412/2019
50
+ :Purchase Date: 2019-02-11
51
+ :Due Date: 2019-02-26
52
+ :Total Net: 195.00
53
+ :Total Amount: 204.75
54
54
  :Taxes:
55
55
  +---------------+--------+----------+---------------+
56
56
  | Base | Code | Rate (%) | Amount |
57
57
  +===============+========+==========+===============+
58
- | | TAX | | 3.34 |
58
+ | | | 5.00 | 9.75 |
59
59
  +---------------+--------+----------+---------------+
60
60
  :Supplier Payment Details:
61
- :Supplier Name: LOGANS
61
+ :Supplier Name: JOHN SMITH
62
62
  :Supplier Company Registrations:
63
- :Supplier Address: 2513 s stemmons freeway lewisville tx 75067
64
- :Supplier Phone Number: 9724596042
65
- :Customer Name:
63
+ :Supplier Address: 4490 Oak Drive Albany, NY 12210
64
+ :Supplier Phone Number:
65
+ :Customer Name: JESSIE M HORNE
66
66
  :Customer Company Registrations:
67
- :Customer Address:
68
- :Document Type: EXPENSE RECEIPT
69
- :Purchase Subcategory: restaurant
70
- :Purchase Category: food
71
- :Total Tax: 3.34
72
- :Tip and Gratuity: 10.00
73
- :Purchase Time: 20:20
67
+ :Customer Address: 2019 Redbud Drive New York, NY 10011
68
+ :Document Type: INVOICE
69
+ :Purchase Subcategory:
70
+ :Purchase Category: miscellaneous
71
+ :Total Tax: 9.75
72
+ :Tip and Gratuity:
73
+ :Purchase Time:
74
74
  :Line Items:
75
75
  +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
76
76
  | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
77
77
  +======================================+==============+==========+============+==============+==============+============+
78
- | TAX | | | | | 3.34 | |
78
+ | Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
79
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
80
+ | New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
81
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
82
+ | Labon 3hrs | | 3.00 | | | 45.00 | 15.00 |
79
83
  +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
80
84
 
81
85
  Page Predictions
@@ -84,37 +88,41 @@ Page Predictions
84
88
  Page 0
85
89
  ------
86
90
  :Locale: en; en; USD;
87
- :Invoice Number:
88
- :Reference Numbers:
89
- :Purchase Date: 2014-07-07
90
- :Due Date: 2014-07-07
91
- :Total Net: 40.48
92
- :Total Amount: 53.82
91
+ :Invoice Number: INT-001
92
+ :Reference Numbers: 2412/2019
93
+ :Purchase Date: 2019-02-11
94
+ :Due Date: 2019-02-26
95
+ :Total Net: 195.00
96
+ :Total Amount: 204.75
93
97
  :Taxes:
94
98
  +---------------+--------+----------+---------------+
95
99
  | Base | Code | Rate (%) | Amount |
96
100
  +===============+========+==========+===============+
97
- | | TAX | | 3.34 |
101
+ | | | 5.00 | 9.75 |
98
102
  +---------------+--------+----------+---------------+
99
103
  :Supplier Payment Details:
100
- :Supplier Name: LOGANS
104
+ :Supplier Name: JOHN SMITH
101
105
  :Supplier Company Registrations:
102
- :Supplier Address: 2513 s stemmons freeway lewisville tx 75067
103
- :Supplier Phone Number: 9724596042
104
- :Customer Name:
106
+ :Supplier Address: 4490 Oak Drive Albany, NY 12210
107
+ :Supplier Phone Number:
108
+ :Customer Name: JESSIE M HORNE
105
109
  :Customer Company Registrations:
106
- :Customer Address:
107
- :Document Type: EXPENSE RECEIPT
108
- :Purchase Subcategory: restaurant
109
- :Purchase Category: food
110
- :Total Tax: 3.34
111
- :Tip and Gratuity: 10.00
112
- :Purchase Time: 20:20
110
+ :Customer Address: 2019 Redbud Drive New York, NY 10011
111
+ :Document Type: INVOICE
112
+ :Purchase Subcategory:
113
+ :Purchase Category: miscellaneous
114
+ :Total Tax: 9.75
115
+ :Tip and Gratuity:
116
+ :Purchase Time:
113
117
  :Line Items:
114
118
  +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
115
119
  | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
116
120
  +======================================+==============+==========+============+==============+==============+============+
117
- | TAX | | | | | 3.34 | |
121
+ | Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
122
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
123
+ | New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
124
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
125
+ | Labon 3hrs | | 3.00 | | | 45.00 | 15.00 |
118
126
  +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
119
127
  ```
120
128
 
@@ -0,0 +1,90 @@
1
+ ---
2
+ title: Generated API Ruby
3
+ ---
4
+ The Ruby OCR SDK supports generated APIs.
5
+ Generated APIs can theoretically support all APIs in a catch-all generic format.
6
+
7
+ # Quick-Start
8
+
9
+ ```ruby
10
+ require 'mindee'
11
+
12
+ # Init a new client
13
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
14
+
15
+ # Load a file from disk
16
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
17
+
18
+ # Initialize a custom endpoint for this product
19
+ custom_endpoint = mindee_client.create_endpoint(
20
+ account_name: 'my-account',
21
+ endpoint_name: 'my-endpoint',
22
+ version: 'my-version'
23
+ )
24
+
25
+ # Parse the file
26
+ result = mindee_client.enqueue_and_parse(
27
+ input_source,
28
+ Mindee::Product::Generated::GeneratedV1,
29
+ endpoint: custom_endpoint
30
+ )
31
+
32
+ # Print a full summary of the parsed data in RST format
33
+ puts result.document
34
+ ```
35
+
36
+ # Generated Endpoints
37
+
38
+ You may have noticed in the previous step that in order to access a custom build, you will need to provide an account and an endpoint name at the very least.
39
+
40
+ Although it is optional, the version number should match the latest version of your build in most use-cases.
41
+ If it is not set, it will default to "1".
42
+
43
+ # Field Types
44
+
45
+ ## Generated Fields
46
+
47
+ ### Generated List Field
48
+
49
+ A `GeneratedListField` is a special type of custom list that implements the following:
50
+
51
+ - **values** (`Array<StringField`[GeneratedObjectField](#Generated-object-field)`>`): the confidence score of the field prediction.
52
+ - **page_id** (`Integer`): only available for some documents ATM.
53
+
54
+ Since the inner contents can vary, the value isn't accessed through a property, but rather through the following functions:
55
+
56
+ - **contents_list()** (`-> Array<String, Float>>`): returns a list of values for each element.
57
+ - **contents_string(separator=" ")** (`-> String`): returns a list of concatenated values, with an optional **separator** `String` between them.
58
+ > **Note:** the `to_s` method returns a string representation of all values of this object, with an empty space between each of them.
59
+
60
+ ### Generated Object Field
61
+
62
+ Unrecognized structures and sometimes values of `ListField`s are stored in a `GeneratedObjectField` structure, which is implemented dynamically depending on the object's structure.
63
+
64
+ - **page_id** (`[Integer, nil]`): the ID of the page, is `nil` when at document-level.
65
+ - **raw_value** (`[String, nil]`): an optional field for when some post-processing has been done on fields (e.g. amounts). `nil` in most instances.
66
+ - **confidence** (`[Float, nil]`): the confidence score of the field prediction. Warning: support isn't guaranteed on all APIs.
67
+
68
+
69
+ > **Other fields**:No matter what, other fields will be stored in a dictionary-like structure with a `key: value` pair where `key` is a string and `value` is a nullable string. They can be accessed like any other regular value, but won't be suggested by your IDE.
70
+
71
+
72
+ ### StringField
73
+ The text field `StringField` only has one constraint: its **value** is an `Optional[str]`.
74
+
75
+
76
+ # Attributes
77
+
78
+ Generated builds always have access to at least two attributes:
79
+
80
+ ## Fields
81
+
82
+ **fields** (`Hash<String, Array<`[GeneratedListField](#generated-list-field),[GeneratedObjectField](#generated-object-field), `(#stringfield)[StringField]>>`):
83
+
84
+ ```ruby
85
+ puts result.document.inference.prediction.fields["my-field"].to_s
86
+ ```
87
+
88
+ # Questions?
89
+
90
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/docs/invoices_v4.md CHANGED
@@ -310,6 +310,9 @@ puts result.document.inference.prediction.supplier_name.value
310
310
  ```rb
311
311
  for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details do
312
312
  puts supplier_payment_details_elem.value
313
+ puts supplier_payment_details_elem.rate
314
+ puts supplier_payment_details_elem.code
315
+ puts supplier_payment_details_elem.basis
313
316
  end
314
317
  ```
315
318
 
@@ -318,7 +321,7 @@ end
318
321
 
319
322
  ```rb
320
323
  for taxes_elem in result.document.inference.prediction.taxes do
321
- puts taxes_elem.to_s
324
+ puts taxes_elem.value
322
325
  end
323
326
  ```
324
327
 
@@ -34,12 +34,12 @@ puts result.document
34
34
  ########
35
35
  Document
36
36
  ########
37
- :Mindee ID: 3a7e1da6-d4d0-4704-af91-051fe5484c2e
37
+ :Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38
38
38
  :Filename: default_sample.jpg
39
39
 
40
40
  Inference
41
41
  #########
42
- :Product: mindee/proof_of_address v1.0
42
+ :Product: mindee/proof_of_address v1.1
43
43
  :Rotation applied: Yes
44
44
 
45
45
  Prediction
@@ -47,7 +47,7 @@ Prediction
47
47
  :Locale: en; en; USD;
48
48
  :Issuer Name: PPL ELECTRIC UTILITIES
49
49
  :Issuer Company Registrations:
50
- :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN,PA 18101-1175
50
+ :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
51
51
  :Recipient Name:
52
52
  :Recipient Company Registrations:
53
53
  :Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
@@ -73,7 +73,7 @@ Page 0
73
73
  :Locale: en; en; USD;
74
74
  :Issuer Name: PPL ELECTRIC UTILITIES
75
75
  :Issuer Company Registrations:
76
- :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN,PA 18101-1175
76
+ :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
77
77
  :Recipient Name:
78
78
  :Recipient Company Registrations:
79
79
  :Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
@@ -54,7 +54,7 @@ Prediction
54
54
  :Date Of Birth: 1957-02-01
55
55
  :Restrictions: NONE
56
56
  :Endorsements: NONE
57
- :Class:
57
+ :Driver License Class: D
58
58
  :Sex: M
59
59
  :Height: 5-08
60
60
  :Weight: 185
@@ -79,7 +79,7 @@ Page 0
79
79
  :Date Of Birth: 1957-02-01
80
80
  :Restrictions: NONE
81
81
  :Endorsements: NONE
82
- :Class:
82
+ :Driver License Class: D
83
83
  :Sex: M
84
84
  :Height: 5-08
85
85
  :Weight: 185
data/lib/mindee/client.rb CHANGED
@@ -38,6 +38,7 @@ module Mindee
38
38
  # @param cropper [Boolean] Whether to include cropper results for each page.
39
39
  # This performs a cropping operation on the server and will increase response time.
40
40
  #
41
+ #
41
42
  # @return [Mindee::Parsing::Common::ApiResponse]
42
43
  def parse(
43
44
  input_source,
@@ -80,6 +81,7 @@ module Mindee
80
81
  # @param cropper [Boolean] Whether to include cropper results for each page.
81
82
  # This performs a cropping operation on the server and will increase response time.
82
83
  #
84
+ #
83
85
  # @return [Mindee::Parsing::Common::ApiResponse]
84
86
  def enqueue(
85
87
  input_source,
@@ -118,7 +120,6 @@ module Mindee
118
120
  end
119
121
 
120
122
  # rubocop:disable Metrics/ParameterLists
121
-
122
123
  # Enqueue a document for async parsing and automatically try to retrieve it
123
124
  #
124
125
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
@@ -139,7 +140,7 @@ module Mindee
139
140
  # This performs a cropping operation on the server and will increase response time.
140
141
  # @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 4.
141
142
  # @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 2.
142
- # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 30.
143
+ # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 60.
143
144
  # @return [Mindee::Parsing::Common::ApiResponse]
144
145
  def enqueue_and_parse(
145
146
  input_source,
@@ -151,7 +152,7 @@ module Mindee
151
152
  cropper: false,
152
153
  initial_delay_sec: 4,
153
154
  delay_sec: 2,
154
- max_retries: 30
155
+ max_retries: 60
155
156
  )
156
157
  enqueue_res = enqueue(
157
158
  input_source,
@@ -178,6 +179,7 @@ module Mindee
178
179
 
179
180
  queue_res
180
181
  end
182
+
181
183
  # rubocop:enable Metrics/ParameterLists
182
184
 
183
185
  # Load a document from an absolute path, as a string.
@@ -46,9 +46,9 @@ module Mindee
46
46
 
47
47
  # Call the prediction API.
48
48
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
49
- # @param all_words [Boolean]
50
- # @param close_file [Boolean]
51
- # @param cropper [Boolean]
49
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
50
+ # @param close_file [Boolean] Whether the file will be closed after reading
51
+ # @param cropper [Boolean] Whether a cropping operation will be applied
52
52
  # @return [Hash]
53
53
  def predict(input_source, all_words, close_file, cropper)
54
54
  check_api_key
@@ -62,8 +62,9 @@ module Mindee
62
62
 
63
63
  # Call the prediction API.
64
64
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
65
- # @param close_file [Boolean]
66
- # @param cropper [Boolean]
65
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
66
+ # @param close_file [Boolean] Whether the file will be closed after reading
67
+ # @param cropper [Boolean] Whether a cropping operation will be applied
67
68
  # @return [Hash]
68
69
  def predict_async(input_source, all_words, close_file, cropper)
69
70
  check_api_key
@@ -91,10 +92,10 @@ module Mindee
91
92
  private
92
93
 
93
94
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
94
- # @param all_words [Boolean]
95
- # @param close_file [Boolean]
96
- # @param cropper [Boolean]
97
- # @return [Net::HTTPResponse]
95
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
96
+ # @param close_file [Boolean] Whether the file will be closed after reading
97
+ # @param cropper [Boolean] Whether a cropping operation will be applied
98
+ # @return [Net::HTTP, nil]
98
99
  def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
99
100
  uri = URI("#{@url_root}/predict")
100
101
 
@@ -122,9 +123,9 @@ module Mindee
122
123
  end
123
124
 
124
125
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
125
- # @param all_words [Boolean]
126
- # @param close_file [Boolean]
127
- # @param cropper [Boolean]
126
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
127
+ # @param close_file [Boolean] Whether the file will be closed after reading
128
+ # @param cropper [Boolean] Whether a cropping operation will be applied
128
129
  # @return [Net::HTTPResponse]
129
130
  def document_queue_req_get(input_source, all_words, close_file, cropper)
130
131
  uri = URI("#{@url_root}/predict_async")
@@ -7,6 +7,7 @@ require_relative '../pdf'
7
7
 
8
8
  module Mindee
9
9
  module Input
10
+ # Document source handling.
10
11
  module Source
11
12
  # Mime types accepted by the server.
12
13
  ALLOWED_MIME_TYPES = [
@@ -107,8 +108,7 @@ module Mindee
107
108
  @io_stream = PdfProcessor.parse(@io_stream, options)
108
109
  end
109
110
 
110
- # Reads a document. Packs it into bytes if needed.
111
- # Note: only needs filename in case of some pdf files.
111
+ # Reads a document.
112
112
  # @param close [Boolean]
113
113
  # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
114
114
  def read_document(close: true)
@@ -116,9 +116,7 @@ module Mindee
116
116
  # Avoids needlessly re-packing some files
117
117
  data = @io_stream.read
118
118
  @io_stream.close if close
119
- return ['document', data, { filename: @filename }] if pdf?
120
-
121
- ['document', [data].pack('m')]
119
+ ['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
122
120
  end
123
121
  end
124
122
 
@@ -142,6 +140,16 @@ module Mindee
142
140
  io_stream.set_encoding Encoding::BINARY
143
141
  super(io_stream, filename, fix_pdf: fix_pdf)
144
142
  end
143
+
144
+ # Overload of the same function to prevent a base64 from being re-encoded.
145
+ # @param close [Boolean]
146
+ # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
147
+ def read_document(close: true)
148
+ @io_stream.seek(0)
149
+ data = @io_stream.read
150
+ @io_stream.close if close
151
+ ['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
152
+ end
145
153
  end
146
154
 
147
155
  # Load a document from raw bytes.
@@ -178,6 +186,21 @@ module Mindee
178
186
  @url = url
179
187
  end
180
188
  end
189
+
190
+ # Replaces non-ASCII characters by their unicode escape sequence.
191
+ # Keeps other characters as is.
192
+ # @return A clean String.
193
+ def self.convert_to_unicode_escape(string)
194
+ unicode_escape_string = ''.dup
195
+ string.each_char do |char|
196
+ unicode_escape_string << if char.bytesize > 1
197
+ "\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
198
+ else
199
+ char
200
+ end
201
+ end
202
+ unicode_escape_string
203
+ end
181
204
  end
182
205
  end
183
206
  end