mindee 3.4.0 → 3.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (93) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +25 -0
  3. data/README.md +6 -1
  4. data/bin/mindee.rb +45 -15
  5. data/docs/bank_account_details_v2.md +1 -1
  6. data/docs/bank_check_v1.md +1 -1
  7. data/docs/bank_statement_fr_v1.md +175 -0
  8. data/docs/barcode_reader_v1.md +1 -1
  9. data/docs/carte_grise_v1.md +5 -5
  10. data/docs/carte_vitale_v1.md +1 -1
  11. data/docs/code_samples/bank_statement_fr_v1_async.txt +19 -0
  12. data/docs/code_samples/default.txt +19 -19
  13. data/docs/code_samples/default_async.txt +25 -0
  14. data/docs/code_samples/eu_driver_license_v1.txt +19 -0
  15. data/docs/code_samples/international_id_v1_async.txt +19 -0
  16. data/docs/code_samples/international_id_v2_async.txt +19 -0
  17. data/docs/code_samples/resume_v1_async.txt +19 -0
  18. data/docs/cropper_v1.md +1 -1
  19. data/docs/custom_v1.md +1 -1
  20. data/docs/eu_driver_license_v1.md +223 -0
  21. data/docs/expense_receipts_v5.md +1 -1
  22. data/docs/financial_document_v1.md +49 -41
  23. data/docs/generated_v1.md +90 -0
  24. data/docs/getting_started.md +1 -1
  25. data/docs/idcard_fr_v2.md +1 -1
  26. data/docs/international_id_v2.md +195 -0
  27. data/docs/invoice_splitter_v1.md +1 -1
  28. data/docs/invoices_v4.md +5 -2
  29. data/docs/license_plates_v1.md +1 -1
  30. data/docs/multi_receipts_detector_v1.md +1 -1
  31. data/docs/passport_v1.md +1 -1
  32. data/docs/proof_of_address_v1.md +5 -5
  33. data/docs/resume_v1.md +334 -0
  34. data/docs/us_driver_license_v1.md +3 -3
  35. data/docs/us_w9_v1.md +1 -1
  36. data/lib/mindee/client.rb +5 -3
  37. data/lib/mindee/http/endpoint.rb +13 -12
  38. data/lib/mindee/input/sources.rb +28 -5
  39. data/lib/mindee/parsing/common/inference.rb +3 -1
  40. data/lib/mindee/parsing/generated/generated_list_field.rb +58 -0
  41. data/lib/mindee/parsing/generated/generated_object_field.rb +109 -0
  42. data/lib/mindee/parsing/generated.rb +4 -0
  43. data/lib/mindee/parsing/standard/base_field.rb +1 -1
  44. data/lib/mindee/parsing.rb +1 -0
  45. data/lib/mindee/product/.rubocop.yml +7 -2
  46. data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +3 -1
  47. data/lib/mindee/product/cropper/cropper_v1.rb +3 -1
  48. data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +41 -0
  49. data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +88 -0
  50. data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +53 -0
  51. data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +3 -1
  52. data/lib/mindee/product/financial_document/financial_document_v1.rb +3 -1
  53. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +3 -1
  54. data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +3 -1
  55. data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +41 -0
  56. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_document.rb +130 -0
  57. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +34 -0
  58. data/lib/mindee/product/fr/bank_statement/bank_statement_v1_transaction.rb +64 -0
  59. data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +3 -1
  60. data/lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb +0 -2
  61. data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +3 -1
  62. data/lib/mindee/product/fr/id_card/id_card_v1.rb +3 -1
  63. data/lib/mindee/product/fr/id_card/id_card_v2.rb +3 -1
  64. data/lib/mindee/product/generated/generated_v1.rb +38 -0
  65. data/lib/mindee/product/generated/generated_v1_document.rb +35 -0
  66. data/lib/mindee/product/generated/generated_v1_page.rb +51 -0
  67. data/lib/mindee/product/generated/generated_v1_prediction.rb +114 -0
  68. data/lib/mindee/product/international_id/international_id_v1.rb +39 -0
  69. data/lib/mindee/product/international_id/international_id_v1_document.rb +109 -0
  70. data/lib/mindee/product/international_id/international_id_v1_page.rb +32 -0
  71. data/lib/mindee/product/international_id/international_id_v2.rb +39 -0
  72. data/lib/mindee/product/international_id/international_id_v2_document.rb +119 -0
  73. data/lib/mindee/product/international_id/international_id_v2_page.rb +32 -0
  74. data/lib/mindee/product/invoice/invoice_v4.rb +3 -1
  75. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +3 -1
  76. data/lib/mindee/product/passport/passport_v1.rb +3 -1
  77. data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +3 -1
  78. data/lib/mindee/product/receipt/receipt_v5.rb +3 -1
  79. data/lib/mindee/product/resume/resume_v1.rb +39 -0
  80. data/lib/mindee/product/resume/resume_v1_certificate.rb +69 -0
  81. data/lib/mindee/product/resume/resume_v1_document.rb +322 -0
  82. data/lib/mindee/product/resume/resume_v1_education.rb +90 -0
  83. data/lib/mindee/product/resume/resume_v1_language.rb +55 -0
  84. data/lib/mindee/product/resume/resume_v1_page.rb +32 -0
  85. data/lib/mindee/product/resume/resume_v1_professional_experience.rb +97 -0
  86. data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +55 -0
  87. data/lib/mindee/product/us/bank_check/bank_check_v1.rb +3 -1
  88. data/lib/mindee/product/us/driver_license/driver_license_v1.rb +3 -1
  89. data/lib/mindee/product/us/w9/w9_v1.rb +3 -1
  90. data/lib/mindee/product.rb +6 -0
  91. data/lib/mindee/version.rb +1 -1
  92. data/lib/mindee.rb +4 -0
  93. metadata +41 -2
data/docs/invoices_v4.md CHANGED
@@ -310,6 +310,9 @@ puts result.document.inference.prediction.supplier_name.value
310
310
  ```rb
311
311
  for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details do
312
312
  puts supplier_payment_details_elem.value
313
+ puts supplier_payment_details_elem.rate
314
+ puts supplier_payment_details_elem.code
315
+ puts supplier_payment_details_elem.basis
313
316
  end
314
317
  ```
315
318
 
@@ -318,7 +321,7 @@ end
318
321
 
319
322
  ```rb
320
323
  for taxes_elem in result.document.inference.prediction.taxes do
321
- puts taxes_elem.to_s
324
+ puts taxes_elem.value
322
325
  end
323
326
  ```
324
327
 
@@ -344,4 +347,4 @@ puts result.document.inference.prediction.total_tax.value
344
347
  ```
345
348
 
346
349
  # Questions?
347
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
350
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -88,4 +88,4 @@ end
88
88
  ```
89
89
 
90
90
  # Questions?
91
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
91
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -105,4 +105,4 @@ end
105
105
  ```
106
106
 
107
107
  # Questions?
108
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
108
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
data/docs/passport_v1.md CHANGED
@@ -183,4 +183,4 @@ puts result.document.inference.prediction.surname.value
183
183
  ```
184
184
 
185
185
  # Questions?
186
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
186
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -34,12 +34,12 @@ puts result.document
34
34
  ########
35
35
  Document
36
36
  ########
37
- :Mindee ID: 3a7e1da6-d4d0-4704-af91-051fe5484c2e
37
+ :Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38
38
38
  :Filename: default_sample.jpg
39
39
 
40
40
  Inference
41
41
  #########
42
- :Product: mindee/proof_of_address v1.0
42
+ :Product: mindee/proof_of_address v1.1
43
43
  :Rotation applied: Yes
44
44
 
45
45
  Prediction
@@ -47,7 +47,7 @@ Prediction
47
47
  :Locale: en; en; USD;
48
48
  :Issuer Name: PPL ELECTRIC UTILITIES
49
49
  :Issuer Company Registrations:
50
- :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN,PA 18101-1175
50
+ :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
51
51
  :Recipient Name:
52
52
  :Recipient Company Registrations:
53
53
  :Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
@@ -73,7 +73,7 @@ Page 0
73
73
  :Locale: en; en; USD;
74
74
  :Issuer Name: PPL ELECTRIC UTILITIES
75
75
  :Issuer Company Registrations:
76
- :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN,PA 18101-1175
76
+ :Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
77
77
  :Recipient Name:
78
78
  :Recipient Company Registrations:
79
79
  :Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
@@ -204,4 +204,4 @@ puts result.document.inference.prediction.recipient_name.value
204
204
  ```
205
205
 
206
206
  # Questions?
207
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
207
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
data/docs/resume_v1.md ADDED
@@ -0,0 +1,334 @@
1
+ ---
2
+ title: Resume OCR Ruby
3
+ ---
4
+ The Ruby OCR SDK supports the [Resume API](https://platform.mindee.com/mindee/resume).
5
+
6
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
7
+ ![Resume sample](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg?raw=true)
8
+
9
+ # Quick-Start
10
+ ```rb
11
+ require 'mindee'
12
+
13
+ # Init a new client
14
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
15
+
16
+ # Load a file from disk
17
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
18
+
19
+ # Parse the file
20
+ result = mindee_client.enqueue_and_parse(
21
+ input_source,
22
+ Mindee::Product::Resume::ResumeV1
23
+ )
24
+
25
+ # Print a full summary of the parsed data in RST format
26
+ puts result.document
27
+
28
+ # Print the document-level parsed data
29
+ # puts result.document.inference.prediction
30
+ ```
31
+
32
+ **Output (RST):**
33
+ ```rst
34
+ ########
35
+ Document
36
+ ########
37
+ :Mindee ID: bc80bae0-af75-4464-95a9-2419403c75bf
38
+ :Filename: default_sample.jpg
39
+
40
+ Inference
41
+ #########
42
+ :Product: mindee/resume v1.0
43
+ :Rotation applied: No
44
+
45
+ Prediction
46
+ ==========
47
+ :Document Language: ENG
48
+ :Document Type: RESUME
49
+ :Given Names: Christopher
50
+ :Surnames: Morgan
51
+ :Nationality:
52
+ :Email Address: christoper.m@gmail.com
53
+ :Phone Number: +44 (0) 20 7666 8555
54
+ :Address: 177 Great Portland Street, London W5W 6PQ
55
+ :Social Networks:
56
+ +----------------------+----------------------------------------------------+
57
+ | Name | URL |
58
+ +======================+====================================================+
59
+ | LinkedIn | linkedin.com/christopher.morgan |
60
+ +----------------------+----------------------------------------------------+
61
+ :Profession: Senior Web Developer
62
+ :Job Applied:
63
+ :Languages:
64
+ +----------+----------------------+
65
+ | Language | Level |
66
+ +==========+======================+
67
+ | SPA | Fluent |
68
+ +----------+----------------------+
69
+ | ZHO | Beginner |
70
+ +----------+----------------------+
71
+ | DEU | Intermediate |
72
+ +----------+----------------------+
73
+ :Hard Skills: HTML5
74
+ PHP OOP
75
+ JavaScript
76
+ CSS
77
+ MySQL
78
+ :Soft Skills: Project management
79
+ Strong decision maker
80
+ Innovative
81
+ Complex problem solver
82
+ Creative design
83
+ Service-focused
84
+ :Education:
85
+ +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
86
+ | Domain | Degree | End Month | End Year | School | Start Month | Start Year |
87
+ +=================+===========================+===========+==========+===========================+=============+============+
88
+ | Computer Inf... | Bachelor | | | Columbia University, NY | | 2014 |
89
+ +-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
90
+ :Professional Experiences:
91
+ +-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
92
+ | Contract Type | Department | Employer | End Month | End Year | Role | Start Month | Start Year |
93
+ +=================+============+===========================+===========+==========+======================+=============+============+
94
+ | Full-Time | | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 |
95
+ +-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
96
+ :Certificates:
97
+ +------------+--------------------------------+---------------------------+------+
98
+ | Grade | Name | Provider | Year |
99
+ +============+================================+===========================+======+
100
+ | | PHP Framework (certificate)... | | 2014 |
101
+ +------------+--------------------------------+---------------------------+------+
102
+ | | Programming Languages: Java... | | |
103
+ +------------+--------------------------------+---------------------------+------+
104
+ ```
105
+
106
+ # Field Types
107
+ ## Standard Fields
108
+ These fields are generic and used in several products.
109
+
110
+ ### Basic Field
111
+ Each prediction object contains a set of fields that inherit from the generic `Field` class.
112
+ A typical `Field` object will have the following attributes:
113
+
114
+ * **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
115
+ * **confidence** (Float, nil): the confidence score of the field prediction.
116
+ * **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
117
+ * **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
118
+ * **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
119
+ * **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
120
+
121
+
122
+ Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
123
+
124
+
125
+ ### Classification Field
126
+ The classification field `ClassificationField` does not implement all the basic `Field` attributes. It only implements **value**, **confidence** and **page_id**.
127
+
128
+ > Note: a classification field's `value is always a `String`.
129
+
130
+ ### String Field
131
+ The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
132
+
133
+ ## Specific Fields
134
+ Fields which are specific to this product; they are not used in any other product.
135
+
136
+ ### Certificates Field
137
+ The list of certificates obtained by the candidate.
138
+
139
+ A `ResumeV1Certificate` implements the following attributes:
140
+
141
+ * `grade` (String): The grade obtained for the certificate.
142
+ * `name` (String): The name of certification.
143
+ * `provider` (String): The organization or institution that issued the certificate.
144
+ * `year` (String): The year when a certificate was issued or received.
145
+ Fields which are specific to this product; they are not used in any other product.
146
+
147
+ ### Education Field
148
+ The list of the candidate's educational background.
149
+
150
+ A `ResumeV1Education` implements the following attributes:
151
+
152
+ * `degree_domain` (String): The area of study or specialization.
153
+ * `degree_type` (String): The type of degree obtained, such as Bachelor's, Master's, or Doctorate.
154
+ * `end_month` (String): The month when the education program or course was completed.
155
+ * `end_year` (String): The year when the education program or course was completed.
156
+ * `school` (String): The name of the school.
157
+ * `start_month` (String): The month when the education program or course began.
158
+ * `start_year` (String): The year when the education program or course began.
159
+ Fields which are specific to this product; they are not used in any other product.
160
+
161
+ ### Languages Field
162
+ The list of languages that the candidate is proficient in.
163
+
164
+ A `ResumeV1Language` implements the following attributes:
165
+
166
+ * `language` (String): The language's ISO 639 code.
167
+ * `level` (String): The candidate's level for the language.
168
+ Fields which are specific to this product; they are not used in any other product.
169
+
170
+ ### Professional Experiences Field
171
+ The list of the candidate's professional experiences.
172
+
173
+ A `ResumeV1ProfessionalExperience` implements the following attributes:
174
+
175
+ * `contract_type` (String): The type of contract for the professional experience.
176
+ * `department` (String): The specific department or division within the company.
177
+ * `employer` (String): The name of the company or organization.
178
+ * `end_month` (String): The month when the professional experience ended.
179
+ * `end_year` (String): The year when the professional experience ended.
180
+ * `role` (String): The position or job title held by the candidate.
181
+ * `start_month` (String): The month when the professional experience began.
182
+ * `start_year` (String): The year when the professional experience began.
183
+ Fields which are specific to this product; they are not used in any other product.
184
+
185
+ ### Social Networks Field
186
+ The list of social network profiles of the candidate.
187
+
188
+ A `ResumeV1SocialNetworksUrl` implements the following attributes:
189
+
190
+ * `name` (String): The name of the social network.
191
+ * `url` (String): The URL of the social network.
192
+
193
+ # Attributes
194
+ The following fields are extracted for Resume V1:
195
+
196
+ ## Address
197
+ **address** ([StringField](#string-field)): The location information of the candidate, including city, state, and country.
198
+
199
+ ```rb
200
+ puts result.document.inference.prediction.address.value
201
+ ```
202
+
203
+ ## Certificates
204
+ **certificates** (Array<[ResumeV1Certificate](#certificates-field)>): The list of certificates obtained by the candidate.
205
+
206
+ ```rb
207
+ for certificates_elem in result.document.inference.prediction.certificates do
208
+ puts certificates_elem.value
209
+ end
210
+ ```
211
+
212
+ ## Document Language
213
+ **document_language** ([StringField](#string-field)): The ISO 639 code of the language in which the document is written.
214
+
215
+ ```rb
216
+ puts result.document.inference.prediction.document_language.value
217
+ ```
218
+
219
+ ## Document Type
220
+ **document_type** ([ClassificationField](#classification-field)): The type of the document sent.
221
+
222
+ ```rb
223
+ puts result.document.inference.prediction.document_type.value
224
+ ```
225
+
226
+ ## Education
227
+ **education** (Array<[ResumeV1Education](#education-field)>): The list of the candidate's educational background.
228
+
229
+ ```rb
230
+ for education_elem in result.document.inference.prediction.education do
231
+ puts education_elem.value
232
+ end
233
+ ```
234
+
235
+ ## Email Address
236
+ **email_address** ([StringField](#string-field)): The email address of the candidate.
237
+
238
+ ```rb
239
+ puts result.document.inference.prediction.email_address.value
240
+ ```
241
+
242
+ ## Given Names
243
+ **given_names** (Array<[StringField](#string-field)>): The candidate's first or given names.
244
+
245
+ ```rb
246
+ for given_names_elem in result.document.inference.prediction.given_names do
247
+ puts given_names_elem.value
248
+ end
249
+ ```
250
+
251
+ ## Hard Skills
252
+ **hard_skills** (Array<[StringField](#string-field)>): The list of the candidate's technical abilities and knowledge.
253
+
254
+ ```rb
255
+ for hard_skills_elem in result.document.inference.prediction.hard_skills do
256
+ puts hard_skills_elem.value
257
+ end
258
+ ```
259
+
260
+ ## Job Applied
261
+ **job_applied** ([StringField](#string-field)): The position that the candidate is applying for.
262
+
263
+ ```rb
264
+ puts result.document.inference.prediction.job_applied.value
265
+ ```
266
+
267
+ ## Languages
268
+ **languages** (Array<[ResumeV1Language](#languages-field)>): The list of languages that the candidate is proficient in.
269
+
270
+ ```rb
271
+ for languages_elem in result.document.inference.prediction.languages do
272
+ puts languages_elem.value
273
+ end
274
+ ```
275
+
276
+ ## Nationality
277
+ **nationality** ([StringField](#string-field)): The ISO 3166 code for the country of citizenship of the candidate.
278
+
279
+ ```rb
280
+ puts result.document.inference.prediction.nationality.value
281
+ ```
282
+
283
+ ## Phone Number
284
+ **phone_number** ([StringField](#string-field)): The phone number of the candidate.
285
+
286
+ ```rb
287
+ puts result.document.inference.prediction.phone_number.value
288
+ ```
289
+
290
+ ## Profession
291
+ **profession** ([StringField](#string-field)): The candidate's current profession.
292
+
293
+ ```rb
294
+ puts result.document.inference.prediction.profession.value
295
+ ```
296
+
297
+ ## Professional Experiences
298
+ **professional_experiences** (Array<[ResumeV1ProfessionalExperience](#professional-experiences-field)>): The list of the candidate's professional experiences.
299
+
300
+ ```rb
301
+ for professional_experiences_elem in result.document.inference.prediction.professional_experiences do
302
+ puts professional_experiences_elem.value
303
+ end
304
+ ```
305
+
306
+ ## Social Networks
307
+ **social_networks_urls** (Array<[ResumeV1SocialNetworksUrl](#social-networks-field)>): The list of social network profiles of the candidate.
308
+
309
+ ```rb
310
+ for social_networks_urls_elem in result.document.inference.prediction.social_networks_urls do
311
+ puts social_networks_urls_elem.value
312
+ end
313
+ ```
314
+
315
+ ## Soft Skills
316
+ **soft_skills** (Array<[StringField](#string-field)>): The list of the candidate's interpersonal and communication abilities.
317
+
318
+ ```rb
319
+ for soft_skills_elem in result.document.inference.prediction.soft_skills do
320
+ puts soft_skills_elem.value
321
+ end
322
+ ```
323
+
324
+ ## Surnames
325
+ **surnames** (Array<[StringField](#string-field)>): The candidate's last names.
326
+
327
+ ```rb
328
+ for surnames_elem in result.document.inference.prediction.surnames do
329
+ puts surnames_elem.value
330
+ end
331
+ ```
332
+
333
+ # Questions?
334
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -54,7 +54,7 @@ Prediction
54
54
  :Date Of Birth: 1957-02-01
55
55
  :Restrictions: NONE
56
56
  :Endorsements: NONE
57
- :Class:
57
+ :Driver License Class: D
58
58
  :Sex: M
59
59
  :Height: 5-08
60
60
  :Weight: 185
@@ -79,7 +79,7 @@ Page 0
79
79
  :Date Of Birth: 1957-02-01
80
80
  :Restrictions: NONE
81
81
  :Endorsements: NONE
82
- :Class:
82
+ :Driver License Class: D
83
83
  :Sex: M
84
84
  :Height: 5-08
85
85
  :Weight: 185
@@ -265,4 +265,4 @@ puts result.document.inference.prediction.weight.value
265
265
  ```
266
266
 
267
267
  # Questions?
268
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
268
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
data/docs/us_w9_v1.md CHANGED
@@ -204,4 +204,4 @@ end
204
204
  ```
205
205
 
206
206
  # Questions?
207
- [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
207
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
data/lib/mindee/client.rb CHANGED
@@ -38,6 +38,7 @@ module Mindee
38
38
  # @param cropper [Boolean] Whether to include cropper results for each page.
39
39
  # This performs a cropping operation on the server and will increase response time.
40
40
  #
41
+ #
41
42
  # @return [Mindee::Parsing::Common::ApiResponse]
42
43
  def parse(
43
44
  input_source,
@@ -80,6 +81,7 @@ module Mindee
80
81
  # @param cropper [Boolean] Whether to include cropper results for each page.
81
82
  # This performs a cropping operation on the server and will increase response time.
82
83
  #
84
+ #
83
85
  # @return [Mindee::Parsing::Common::ApiResponse]
84
86
  def enqueue(
85
87
  input_source,
@@ -118,7 +120,6 @@ module Mindee
118
120
  end
119
121
 
120
122
  # rubocop:disable Metrics/ParameterLists
121
-
122
123
  # Enqueue a document for async parsing and automatically try to retrieve it
123
124
  #
124
125
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
@@ -139,7 +140,7 @@ module Mindee
139
140
  # This performs a cropping operation on the server and will increase response time.
140
141
  # @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 4.
141
142
  # @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 2.
142
- # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 30.
143
+ # @param max_retries [Integer, nil] maximum amount of retries. Defaults to 60.
143
144
  # @return [Mindee::Parsing::Common::ApiResponse]
144
145
  def enqueue_and_parse(
145
146
  input_source,
@@ -151,7 +152,7 @@ module Mindee
151
152
  cropper: false,
152
153
  initial_delay_sec: 4,
153
154
  delay_sec: 2,
154
- max_retries: 30
155
+ max_retries: 60
155
156
  )
156
157
  enqueue_res = enqueue(
157
158
  input_source,
@@ -178,6 +179,7 @@ module Mindee
178
179
 
179
180
  queue_res
180
181
  end
182
+
181
183
  # rubocop:enable Metrics/ParameterLists
182
184
 
183
185
  # Load a document from an absolute path, as a string.
@@ -46,9 +46,9 @@ module Mindee
46
46
 
47
47
  # Call the prediction API.
48
48
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
49
- # @param all_words [Boolean]
50
- # @param close_file [Boolean]
51
- # @param cropper [Boolean]
49
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
50
+ # @param close_file [Boolean] Whether the file will be closed after reading
51
+ # @param cropper [Boolean] Whether a cropping operation will be applied
52
52
  # @return [Hash]
53
53
  def predict(input_source, all_words, close_file, cropper)
54
54
  check_api_key
@@ -62,8 +62,9 @@ module Mindee
62
62
 
63
63
  # Call the prediction API.
64
64
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
65
- # @param close_file [Boolean]
66
- # @param cropper [Boolean]
65
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
66
+ # @param close_file [Boolean] Whether the file will be closed after reading
67
+ # @param cropper [Boolean] Whether a cropping operation will be applied
67
68
  # @return [Hash]
68
69
  def predict_async(input_source, all_words, close_file, cropper)
69
70
  check_api_key
@@ -91,10 +92,10 @@ module Mindee
91
92
  private
92
93
 
93
94
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
94
- # @param all_words [Boolean]
95
- # @param close_file [Boolean]
96
- # @param cropper [Boolean]
97
- # @return [Net::HTTPResponse]
95
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
96
+ # @param close_file [Boolean] Whether the file will be closed after reading
97
+ # @param cropper [Boolean] Whether a cropping operation will be applied
98
+ # @return [Net::HTTP, nil]
98
99
  def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
99
100
  uri = URI("#{@url_root}/predict")
100
101
 
@@ -122,9 +123,9 @@ module Mindee
122
123
  end
123
124
 
124
125
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
125
- # @param all_words [Boolean]
126
- # @param close_file [Boolean]
127
- # @param cropper [Boolean]
126
+ # @param all_words [Boolean] Whether the full word extraction needs to be performed
127
+ # @param close_file [Boolean] Whether the file will be closed after reading
128
+ # @param cropper [Boolean] Whether a cropping operation will be applied
128
129
  # @return [Net::HTTPResponse]
129
130
  def document_queue_req_get(input_source, all_words, close_file, cropper)
130
131
  uri = URI("#{@url_root}/predict_async")
@@ -7,6 +7,7 @@ require_relative '../pdf'
7
7
 
8
8
  module Mindee
9
9
  module Input
10
+ # Document source handling.
10
11
  module Source
11
12
  # Mime types accepted by the server.
12
13
  ALLOWED_MIME_TYPES = [
@@ -107,8 +108,7 @@ module Mindee
107
108
  @io_stream = PdfProcessor.parse(@io_stream, options)
108
109
  end
109
110
 
110
- # Reads a document. Packs it into bytes if needed.
111
- # Note: only needs filename in case of some pdf files.
111
+ # Reads a document.
112
112
  # @param close [Boolean]
113
113
  # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
114
114
  def read_document(close: true)
@@ -116,9 +116,7 @@ module Mindee
116
116
  # Avoids needlessly re-packing some files
117
117
  data = @io_stream.read
118
118
  @io_stream.close if close
119
- return ['document', data, { filename: @filename }] if pdf?
120
-
121
- ['document', [data].pack('m')]
119
+ ['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
122
120
  end
123
121
  end
124
122
 
@@ -142,6 +140,16 @@ module Mindee
142
140
  io_stream.set_encoding Encoding::BINARY
143
141
  super(io_stream, filename, fix_pdf: fix_pdf)
144
142
  end
143
+
144
+ # Overload of the same function to prevent a base64 from being re-encoded.
145
+ # @param close [Boolean]
146
+ # @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
147
+ def read_document(close: true)
148
+ @io_stream.seek(0)
149
+ data = @io_stream.read
150
+ @io_stream.close if close
151
+ ['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
152
+ end
145
153
  end
146
154
 
147
155
  # Load a document from raw bytes.
@@ -178,6 +186,21 @@ module Mindee
178
186
  @url = url
179
187
  end
180
188
  end
189
+
190
+ # Replaces non-ASCII characters by their unicode escape sequence.
191
+ # Keeps other characters as is.
192
+ # @return A clean String.
193
+ def self.convert_to_unicode_escape(string)
194
+ unicode_escape_string = ''.dup
195
+ string.each_char do |char|
196
+ unicode_escape_string << if char.bytesize > 1
197
+ "\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
198
+ else
199
+ char
200
+ end
201
+ end
202
+ unicode_escape_string
203
+ end
181
204
  end
182
205
  end
183
206
  end
@@ -34,8 +34,10 @@ module Mindee
34
34
  out_str << "\n:Rotation applied: #{is_rotation_applied}"
35
35
  out_str << "\n\nPrediction\n=========="
36
36
  out_str << "\n#{@prediction.to_s.size.positive? ? "#{@prediction}\n" : ''}"
37
- out_str << "\nPage Predictions\n================\n\n"
37
+ out_str << "\nPage Predictions\n================\n\n" unless @pages.empty?
38
38
  out_str << @pages.map(&:to_s).join("\n\n")
39
+ out_str.rstrip!
40
+ out_str
39
41
  end
40
42
  end
41
43
  end