mindee 3.4.0 → 3.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +6 -1
- data/bin/mindee.rb +45 -15
- data/docs/bank_account_details_v2.md +1 -1
- data/docs/bank_check_v1.md +1 -1
- data/docs/bank_statement_fr_v1.md +175 -0
- data/docs/barcode_reader_v1.md +1 -1
- data/docs/carte_grise_v1.md +5 -5
- data/docs/carte_vitale_v1.md +1 -1
- data/docs/code_samples/bank_statement_fr_v1_async.txt +19 -0
- data/docs/code_samples/default.txt +19 -19
- data/docs/code_samples/default_async.txt +25 -0
- data/docs/code_samples/eu_driver_license_v1.txt +19 -0
- data/docs/code_samples/international_id_v1_async.txt +19 -0
- data/docs/code_samples/international_id_v2_async.txt +19 -0
- data/docs/code_samples/resume_v1_async.txt +19 -0
- data/docs/cropper_v1.md +1 -1
- data/docs/custom_v1.md +1 -1
- data/docs/eu_driver_license_v1.md +223 -0
- data/docs/expense_receipts_v5.md +1 -1
- data/docs/financial_document_v1.md +49 -41
- data/docs/generated_v1.md +90 -0
- data/docs/getting_started.md +1 -1
- data/docs/idcard_fr_v2.md +1 -1
- data/docs/international_id_v2.md +195 -0
- data/docs/invoice_splitter_v1.md +1 -1
- data/docs/invoices_v4.md +5 -2
- data/docs/license_plates_v1.md +1 -1
- data/docs/multi_receipts_detector_v1.md +1 -1
- data/docs/passport_v1.md +1 -1
- data/docs/proof_of_address_v1.md +5 -5
- data/docs/resume_v1.md +334 -0
- data/docs/us_driver_license_v1.md +3 -3
- data/docs/us_w9_v1.md +1 -1
- data/lib/mindee/client.rb +5 -3
- data/lib/mindee/http/endpoint.rb +13 -12
- data/lib/mindee/input/sources.rb +28 -5
- data/lib/mindee/parsing/common/inference.rb +3 -1
- data/lib/mindee/parsing/generated/generated_list_field.rb +58 -0
- data/lib/mindee/parsing/generated/generated_object_field.rb +109 -0
- data/lib/mindee/parsing/generated.rb +4 -0
- data/lib/mindee/parsing/standard/base_field.rb +1 -1
- data/lib/mindee/parsing.rb +1 -0
- data/lib/mindee/product/.rubocop.yml +7 -2
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +3 -1
- data/lib/mindee/product/cropper/cropper_v1.rb +3 -1
- data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +41 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +88 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +53 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +3 -1
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +41 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_document.rb +130 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_transaction.rb +64 -0
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +3 -1
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb +0 -2
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +3 -1
- data/lib/mindee/product/generated/generated_v1.rb +38 -0
- data/lib/mindee/product/generated/generated_v1_document.rb +35 -0
- data/lib/mindee/product/generated/generated_v1_page.rb +51 -0
- data/lib/mindee/product/generated/generated_v1_prediction.rb +114 -0
- data/lib/mindee/product/international_id/international_id_v1.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v1_document.rb +109 -0
- data/lib/mindee/product/international_id/international_id_v1_page.rb +32 -0
- data/lib/mindee/product/international_id/international_id_v2.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v2_document.rb +119 -0
- data/lib/mindee/product/international_id/international_id_v2_page.rb +32 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +3 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +3 -1
- data/lib/mindee/product/passport/passport_v1.rb +3 -1
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +3 -1
- data/lib/mindee/product/receipt/receipt_v5.rb +3 -1
- data/lib/mindee/product/resume/resume_v1.rb +39 -0
- data/lib/mindee/product/resume/resume_v1_certificate.rb +69 -0
- data/lib/mindee/product/resume/resume_v1_document.rb +322 -0
- data/lib/mindee/product/resume/resume_v1_education.rb +90 -0
- data/lib/mindee/product/resume/resume_v1_language.rb +55 -0
- data/lib/mindee/product/resume/resume_v1_page.rb +32 -0
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +97 -0
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +55 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +3 -1
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +3 -1
- data/lib/mindee/product/us/w9/w9_v1.rb +3 -1
- data/lib/mindee/product.rb +6 -0
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +4 -0
- metadata +41 -2
data/docs/invoices_v4.md
CHANGED
@@ -310,6 +310,9 @@ puts result.document.inference.prediction.supplier_name.value
|
|
310
310
|
```rb
|
311
311
|
for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details do
|
312
312
|
puts supplier_payment_details_elem.value
|
313
|
+
puts supplier_payment_details_elem.rate
|
314
|
+
puts supplier_payment_details_elem.code
|
315
|
+
puts supplier_payment_details_elem.basis
|
313
316
|
end
|
314
317
|
```
|
315
318
|
|
@@ -318,7 +321,7 @@ end
|
|
318
321
|
|
319
322
|
```rb
|
320
323
|
for taxes_elem in result.document.inference.prediction.taxes do
|
321
|
-
puts taxes_elem.
|
324
|
+
puts taxes_elem.value
|
322
325
|
end
|
323
326
|
```
|
324
327
|
|
@@ -344,4 +347,4 @@ puts result.document.inference.prediction.total_tax.value
|
|
344
347
|
```
|
345
348
|
|
346
349
|
# Questions?
|
347
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
350
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/license_plates_v1.md
CHANGED
data/docs/passport_v1.md
CHANGED
@@ -183,4 +183,4 @@ puts result.document.inference.prediction.surname.value
|
|
183
183
|
```
|
184
184
|
|
185
185
|
# Questions?
|
186
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
186
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/proof_of_address_v1.md
CHANGED
@@ -34,12 +34,12 @@ puts result.document
|
|
34
34
|
########
|
35
35
|
Document
|
36
36
|
########
|
37
|
-
:Mindee ID:
|
37
|
+
:Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38
|
38
38
|
:Filename: default_sample.jpg
|
39
39
|
|
40
40
|
Inference
|
41
41
|
#########
|
42
|
-
:Product: mindee/proof_of_address v1.
|
42
|
+
:Product: mindee/proof_of_address v1.1
|
43
43
|
:Rotation applied: Yes
|
44
44
|
|
45
45
|
Prediction
|
@@ -47,7 +47,7 @@ Prediction
|
|
47
47
|
:Locale: en; en; USD;
|
48
48
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
49
49
|
:Issuer Company Registrations:
|
50
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
50
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
51
51
|
:Recipient Name:
|
52
52
|
:Recipient Company Registrations:
|
53
53
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
@@ -73,7 +73,7 @@ Page 0
|
|
73
73
|
:Locale: en; en; USD;
|
74
74
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
75
75
|
:Issuer Company Registrations:
|
76
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
76
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
77
77
|
:Recipient Name:
|
78
78
|
:Recipient Company Registrations:
|
79
79
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
@@ -204,4 +204,4 @@ puts result.document.inference.prediction.recipient_name.value
|
|
204
204
|
```
|
205
205
|
|
206
206
|
# Questions?
|
207
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
207
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/resume_v1.md
ADDED
@@ -0,0 +1,334 @@
|
|
1
|
+
---
|
2
|
+
title: Resume OCR Ruby
|
3
|
+
---
|
4
|
+
The Ruby OCR SDK supports the [Resume API](https://platform.mindee.com/mindee/resume).
|
5
|
+
|
6
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
7
|
+
![Resume sample](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg?raw=true)
|
8
|
+
|
9
|
+
# Quick-Start
|
10
|
+
```rb
|
11
|
+
require 'mindee'
|
12
|
+
|
13
|
+
# Init a new client
|
14
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
15
|
+
|
16
|
+
# Load a file from disk
|
17
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
18
|
+
|
19
|
+
# Parse the file
|
20
|
+
result = mindee_client.enqueue_and_parse(
|
21
|
+
input_source,
|
22
|
+
Mindee::Product::Resume::ResumeV1
|
23
|
+
)
|
24
|
+
|
25
|
+
# Print a full summary of the parsed data in RST format
|
26
|
+
puts result.document
|
27
|
+
|
28
|
+
# Print the document-level parsed data
|
29
|
+
# puts result.document.inference.prediction
|
30
|
+
```
|
31
|
+
|
32
|
+
**Output (RST):**
|
33
|
+
```rst
|
34
|
+
########
|
35
|
+
Document
|
36
|
+
########
|
37
|
+
:Mindee ID: bc80bae0-af75-4464-95a9-2419403c75bf
|
38
|
+
:Filename: default_sample.jpg
|
39
|
+
|
40
|
+
Inference
|
41
|
+
#########
|
42
|
+
:Product: mindee/resume v1.0
|
43
|
+
:Rotation applied: No
|
44
|
+
|
45
|
+
Prediction
|
46
|
+
==========
|
47
|
+
:Document Language: ENG
|
48
|
+
:Document Type: RESUME
|
49
|
+
:Given Names: Christopher
|
50
|
+
:Surnames: Morgan
|
51
|
+
:Nationality:
|
52
|
+
:Email Address: christoper.m@gmail.com
|
53
|
+
:Phone Number: +44 (0) 20 7666 8555
|
54
|
+
:Address: 177 Great Portland Street, London W5W 6PQ
|
55
|
+
:Social Networks:
|
56
|
+
+----------------------+----------------------------------------------------+
|
57
|
+
| Name | URL |
|
58
|
+
+======================+====================================================+
|
59
|
+
| LinkedIn | linkedin.com/christopher.morgan |
|
60
|
+
+----------------------+----------------------------------------------------+
|
61
|
+
:Profession: Senior Web Developer
|
62
|
+
:Job Applied:
|
63
|
+
:Languages:
|
64
|
+
+----------+----------------------+
|
65
|
+
| Language | Level |
|
66
|
+
+==========+======================+
|
67
|
+
| SPA | Fluent |
|
68
|
+
+----------+----------------------+
|
69
|
+
| ZHO | Beginner |
|
70
|
+
+----------+----------------------+
|
71
|
+
| DEU | Intermediate |
|
72
|
+
+----------+----------------------+
|
73
|
+
:Hard Skills: HTML5
|
74
|
+
PHP OOP
|
75
|
+
JavaScript
|
76
|
+
CSS
|
77
|
+
MySQL
|
78
|
+
:Soft Skills: Project management
|
79
|
+
Strong decision maker
|
80
|
+
Innovative
|
81
|
+
Complex problem solver
|
82
|
+
Creative design
|
83
|
+
Service-focused
|
84
|
+
:Education:
|
85
|
+
+-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
|
86
|
+
| Domain | Degree | End Month | End Year | School | Start Month | Start Year |
|
87
|
+
+=================+===========================+===========+==========+===========================+=============+============+
|
88
|
+
| Computer Inf... | Bachelor | | | Columbia University, NY | | 2014 |
|
89
|
+
+-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
|
90
|
+
:Professional Experiences:
|
91
|
+
+-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
|
92
|
+
| Contract Type | Department | Employer | End Month | End Year | Role | Start Month | Start Year |
|
93
|
+
+=================+============+===========================+===========+==========+======================+=============+============+
|
94
|
+
| Full-Time | | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 |
|
95
|
+
+-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
|
96
|
+
:Certificates:
|
97
|
+
+------------+--------------------------------+---------------------------+------+
|
98
|
+
| Grade | Name | Provider | Year |
|
99
|
+
+============+================================+===========================+======+
|
100
|
+
| | PHP Framework (certificate)... | | 2014 |
|
101
|
+
+------------+--------------------------------+---------------------------+------+
|
102
|
+
| | Programming Languages: Java... | | |
|
103
|
+
+------------+--------------------------------+---------------------------+------+
|
104
|
+
```
|
105
|
+
|
106
|
+
# Field Types
|
107
|
+
## Standard Fields
|
108
|
+
These fields are generic and used in several products.
|
109
|
+
|
110
|
+
### Basic Field
|
111
|
+
Each prediction object contains a set of fields that inherit from the generic `Field` class.
|
112
|
+
A typical `Field` object will have the following attributes:
|
113
|
+
|
114
|
+
* **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
|
115
|
+
* **confidence** (Float, nil): the confidence score of the field prediction.
|
116
|
+
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
117
|
+
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
118
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
|
119
|
+
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
120
|
+
|
121
|
+
|
122
|
+
Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
|
123
|
+
|
124
|
+
|
125
|
+
### Classification Field
|
126
|
+
The classification field `ClassificationField` does not implement all the basic `Field` attributes. It only implements **value**, **confidence** and **page_id**.
|
127
|
+
|
128
|
+
> Note: a classification field's `value is always a `String`.
|
129
|
+
|
130
|
+
### String Field
|
131
|
+
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
132
|
+
|
133
|
+
## Specific Fields
|
134
|
+
Fields which are specific to this product; they are not used in any other product.
|
135
|
+
|
136
|
+
### Certificates Field
|
137
|
+
The list of certificates obtained by the candidate.
|
138
|
+
|
139
|
+
A `ResumeV1Certificate` implements the following attributes:
|
140
|
+
|
141
|
+
* `grade` (String): The grade obtained for the certificate.
|
142
|
+
* `name` (String): The name of certification.
|
143
|
+
* `provider` (String): The organization or institution that issued the certificate.
|
144
|
+
* `year` (String): The year when a certificate was issued or received.
|
145
|
+
Fields which are specific to this product; they are not used in any other product.
|
146
|
+
|
147
|
+
### Education Field
|
148
|
+
The list of the candidate's educational background.
|
149
|
+
|
150
|
+
A `ResumeV1Education` implements the following attributes:
|
151
|
+
|
152
|
+
* `degree_domain` (String): The area of study or specialization.
|
153
|
+
* `degree_type` (String): The type of degree obtained, such as Bachelor's, Master's, or Doctorate.
|
154
|
+
* `end_month` (String): The month when the education program or course was completed.
|
155
|
+
* `end_year` (String): The year when the education program or course was completed.
|
156
|
+
* `school` (String): The name of the school.
|
157
|
+
* `start_month` (String): The month when the education program or course began.
|
158
|
+
* `start_year` (String): The year when the education program or course began.
|
159
|
+
Fields which are specific to this product; they are not used in any other product.
|
160
|
+
|
161
|
+
### Languages Field
|
162
|
+
The list of languages that the candidate is proficient in.
|
163
|
+
|
164
|
+
A `ResumeV1Language` implements the following attributes:
|
165
|
+
|
166
|
+
* `language` (String): The language's ISO 639 code.
|
167
|
+
* `level` (String): The candidate's level for the language.
|
168
|
+
Fields which are specific to this product; they are not used in any other product.
|
169
|
+
|
170
|
+
### Professional Experiences Field
|
171
|
+
The list of the candidate's professional experiences.
|
172
|
+
|
173
|
+
A `ResumeV1ProfessionalExperience` implements the following attributes:
|
174
|
+
|
175
|
+
* `contract_type` (String): The type of contract for the professional experience.
|
176
|
+
* `department` (String): The specific department or division within the company.
|
177
|
+
* `employer` (String): The name of the company or organization.
|
178
|
+
* `end_month` (String): The month when the professional experience ended.
|
179
|
+
* `end_year` (String): The year when the professional experience ended.
|
180
|
+
* `role` (String): The position or job title held by the candidate.
|
181
|
+
* `start_month` (String): The month when the professional experience began.
|
182
|
+
* `start_year` (String): The year when the professional experience began.
|
183
|
+
Fields which are specific to this product; they are not used in any other product.
|
184
|
+
|
185
|
+
### Social Networks Field
|
186
|
+
The list of social network profiles of the candidate.
|
187
|
+
|
188
|
+
A `ResumeV1SocialNetworksUrl` implements the following attributes:
|
189
|
+
|
190
|
+
* `name` (String): The name of the social network.
|
191
|
+
* `url` (String): The URL of the social network.
|
192
|
+
|
193
|
+
# Attributes
|
194
|
+
The following fields are extracted for Resume V1:
|
195
|
+
|
196
|
+
## Address
|
197
|
+
**address** ([StringField](#string-field)): The location information of the candidate, including city, state, and country.
|
198
|
+
|
199
|
+
```rb
|
200
|
+
puts result.document.inference.prediction.address.value
|
201
|
+
```
|
202
|
+
|
203
|
+
## Certificates
|
204
|
+
**certificates** (Array<[ResumeV1Certificate](#certificates-field)>): The list of certificates obtained by the candidate.
|
205
|
+
|
206
|
+
```rb
|
207
|
+
for certificates_elem in result.document.inference.prediction.certificates do
|
208
|
+
puts certificates_elem.value
|
209
|
+
end
|
210
|
+
```
|
211
|
+
|
212
|
+
## Document Language
|
213
|
+
**document_language** ([StringField](#string-field)): The ISO 639 code of the language in which the document is written.
|
214
|
+
|
215
|
+
```rb
|
216
|
+
puts result.document.inference.prediction.document_language.value
|
217
|
+
```
|
218
|
+
|
219
|
+
## Document Type
|
220
|
+
**document_type** ([ClassificationField](#classification-field)): The type of the document sent.
|
221
|
+
|
222
|
+
```rb
|
223
|
+
puts result.document.inference.prediction.document_type.value
|
224
|
+
```
|
225
|
+
|
226
|
+
## Education
|
227
|
+
**education** (Array<[ResumeV1Education](#education-field)>): The list of the candidate's educational background.
|
228
|
+
|
229
|
+
```rb
|
230
|
+
for education_elem in result.document.inference.prediction.education do
|
231
|
+
puts education_elem.value
|
232
|
+
end
|
233
|
+
```
|
234
|
+
|
235
|
+
## Email Address
|
236
|
+
**email_address** ([StringField](#string-field)): The email address of the candidate.
|
237
|
+
|
238
|
+
```rb
|
239
|
+
puts result.document.inference.prediction.email_address.value
|
240
|
+
```
|
241
|
+
|
242
|
+
## Given Names
|
243
|
+
**given_names** (Array<[StringField](#string-field)>): The candidate's first or given names.
|
244
|
+
|
245
|
+
```rb
|
246
|
+
for given_names_elem in result.document.inference.prediction.given_names do
|
247
|
+
puts given_names_elem.value
|
248
|
+
end
|
249
|
+
```
|
250
|
+
|
251
|
+
## Hard Skills
|
252
|
+
**hard_skills** (Array<[StringField](#string-field)>): The list of the candidate's technical abilities and knowledge.
|
253
|
+
|
254
|
+
```rb
|
255
|
+
for hard_skills_elem in result.document.inference.prediction.hard_skills do
|
256
|
+
puts hard_skills_elem.value
|
257
|
+
end
|
258
|
+
```
|
259
|
+
|
260
|
+
## Job Applied
|
261
|
+
**job_applied** ([StringField](#string-field)): The position that the candidate is applying for.
|
262
|
+
|
263
|
+
```rb
|
264
|
+
puts result.document.inference.prediction.job_applied.value
|
265
|
+
```
|
266
|
+
|
267
|
+
## Languages
|
268
|
+
**languages** (Array<[ResumeV1Language](#languages-field)>): The list of languages that the candidate is proficient in.
|
269
|
+
|
270
|
+
```rb
|
271
|
+
for languages_elem in result.document.inference.prediction.languages do
|
272
|
+
puts languages_elem.value
|
273
|
+
end
|
274
|
+
```
|
275
|
+
|
276
|
+
## Nationality
|
277
|
+
**nationality** ([StringField](#string-field)): The ISO 3166 code for the country of citizenship of the candidate.
|
278
|
+
|
279
|
+
```rb
|
280
|
+
puts result.document.inference.prediction.nationality.value
|
281
|
+
```
|
282
|
+
|
283
|
+
## Phone Number
|
284
|
+
**phone_number** ([StringField](#string-field)): The phone number of the candidate.
|
285
|
+
|
286
|
+
```rb
|
287
|
+
puts result.document.inference.prediction.phone_number.value
|
288
|
+
```
|
289
|
+
|
290
|
+
## Profession
|
291
|
+
**profession** ([StringField](#string-field)): The candidate's current profession.
|
292
|
+
|
293
|
+
```rb
|
294
|
+
puts result.document.inference.prediction.profession.value
|
295
|
+
```
|
296
|
+
|
297
|
+
## Professional Experiences
|
298
|
+
**professional_experiences** (Array<[ResumeV1ProfessionalExperience](#professional-experiences-field)>): The list of the candidate's professional experiences.
|
299
|
+
|
300
|
+
```rb
|
301
|
+
for professional_experiences_elem in result.document.inference.prediction.professional_experiences do
|
302
|
+
puts professional_experiences_elem.value
|
303
|
+
end
|
304
|
+
```
|
305
|
+
|
306
|
+
## Social Networks
|
307
|
+
**social_networks_urls** (Array<[ResumeV1SocialNetworksUrl](#social-networks-field)>): The list of social network profiles of the candidate.
|
308
|
+
|
309
|
+
```rb
|
310
|
+
for social_networks_urls_elem in result.document.inference.prediction.social_networks_urls do
|
311
|
+
puts social_networks_urls_elem.value
|
312
|
+
end
|
313
|
+
```
|
314
|
+
|
315
|
+
## Soft Skills
|
316
|
+
**soft_skills** (Array<[StringField](#string-field)>): The list of the candidate's interpersonal and communication abilities.
|
317
|
+
|
318
|
+
```rb
|
319
|
+
for soft_skills_elem in result.document.inference.prediction.soft_skills do
|
320
|
+
puts soft_skills_elem.value
|
321
|
+
end
|
322
|
+
```
|
323
|
+
|
324
|
+
## Surnames
|
325
|
+
**surnames** (Array<[StringField](#string-field)>): The candidate's last names.
|
326
|
+
|
327
|
+
```rb
|
328
|
+
for surnames_elem in result.document.inference.prediction.surnames do
|
329
|
+
puts surnames_elem.value
|
330
|
+
end
|
331
|
+
```
|
332
|
+
|
333
|
+
# Questions?
|
334
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
@@ -54,7 +54,7 @@ Prediction
|
|
54
54
|
:Date Of Birth: 1957-02-01
|
55
55
|
:Restrictions: NONE
|
56
56
|
:Endorsements: NONE
|
57
|
-
:Class:
|
57
|
+
:Driver License Class: D
|
58
58
|
:Sex: M
|
59
59
|
:Height: 5-08
|
60
60
|
:Weight: 185
|
@@ -79,7 +79,7 @@ Page 0
|
|
79
79
|
:Date Of Birth: 1957-02-01
|
80
80
|
:Restrictions: NONE
|
81
81
|
:Endorsements: NONE
|
82
|
-
:Class:
|
82
|
+
:Driver License Class: D
|
83
83
|
:Sex: M
|
84
84
|
:Height: 5-08
|
85
85
|
:Weight: 185
|
@@ -265,4 +265,4 @@ puts result.document.inference.prediction.weight.value
|
|
265
265
|
```
|
266
266
|
|
267
267
|
# Questions?
|
268
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
268
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/us_w9_v1.md
CHANGED
data/lib/mindee/client.rb
CHANGED
@@ -38,6 +38,7 @@ module Mindee
|
|
38
38
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
39
39
|
# This performs a cropping operation on the server and will increase response time.
|
40
40
|
#
|
41
|
+
#
|
41
42
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
42
43
|
def parse(
|
43
44
|
input_source,
|
@@ -80,6 +81,7 @@ module Mindee
|
|
80
81
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
81
82
|
# This performs a cropping operation on the server and will increase response time.
|
82
83
|
#
|
84
|
+
#
|
83
85
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
84
86
|
def enqueue(
|
85
87
|
input_source,
|
@@ -118,7 +120,6 @@ module Mindee
|
|
118
120
|
end
|
119
121
|
|
120
122
|
# rubocop:disable Metrics/ParameterLists
|
121
|
-
|
122
123
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
123
124
|
#
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
@@ -139,7 +140,7 @@ module Mindee
|
|
139
140
|
# This performs a cropping operation on the server and will increase response time.
|
140
141
|
# @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 4.
|
141
142
|
# @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 2.
|
142
|
-
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to
|
143
|
+
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to 60.
|
143
144
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
144
145
|
def enqueue_and_parse(
|
145
146
|
input_source,
|
@@ -151,7 +152,7 @@ module Mindee
|
|
151
152
|
cropper: false,
|
152
153
|
initial_delay_sec: 4,
|
153
154
|
delay_sec: 2,
|
154
|
-
max_retries:
|
155
|
+
max_retries: 60
|
155
156
|
)
|
156
157
|
enqueue_res = enqueue(
|
157
158
|
input_source,
|
@@ -178,6 +179,7 @@ module Mindee
|
|
178
179
|
|
179
180
|
queue_res
|
180
181
|
end
|
182
|
+
|
181
183
|
# rubocop:enable Metrics/ParameterLists
|
182
184
|
|
183
185
|
# Load a document from an absolute path, as a string.
|
data/lib/mindee/http/endpoint.rb
CHANGED
@@ -46,9 +46,9 @@ module Mindee
|
|
46
46
|
|
47
47
|
# Call the prediction API.
|
48
48
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
49
|
-
# @param all_words [Boolean]
|
50
|
-
# @param close_file [Boolean]
|
51
|
-
# @param cropper [Boolean]
|
49
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
50
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
51
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
52
52
|
# @return [Hash]
|
53
53
|
def predict(input_source, all_words, close_file, cropper)
|
54
54
|
check_api_key
|
@@ -62,8 +62,9 @@ module Mindee
|
|
62
62
|
|
63
63
|
# Call the prediction API.
|
64
64
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
65
|
-
# @param
|
66
|
-
# @param
|
65
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
66
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
67
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
67
68
|
# @return [Hash]
|
68
69
|
def predict_async(input_source, all_words, close_file, cropper)
|
69
70
|
check_api_key
|
@@ -91,10 +92,10 @@ module Mindee
|
|
91
92
|
private
|
92
93
|
|
93
94
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
94
|
-
# @param all_words [Boolean]
|
95
|
-
# @param close_file [Boolean]
|
96
|
-
# @param cropper [Boolean]
|
97
|
-
# @return [Net::
|
95
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
96
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
97
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
98
|
+
# @return [Net::HTTP, nil]
|
98
99
|
def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
|
99
100
|
uri = URI("#{@url_root}/predict")
|
100
101
|
|
@@ -122,9 +123,9 @@ module Mindee
|
|
122
123
|
end
|
123
124
|
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
125
|
-
# @param all_words [Boolean]
|
126
|
-
# @param close_file [Boolean]
|
127
|
-
# @param cropper [Boolean]
|
126
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
127
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
128
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
128
129
|
# @return [Net::HTTPResponse]
|
129
130
|
def document_queue_req_get(input_source, all_words, close_file, cropper)
|
130
131
|
uri = URI("#{@url_root}/predict_async")
|
data/lib/mindee/input/sources.rb
CHANGED
@@ -7,6 +7,7 @@ require_relative '../pdf'
|
|
7
7
|
|
8
8
|
module Mindee
|
9
9
|
module Input
|
10
|
+
# Document source handling.
|
10
11
|
module Source
|
11
12
|
# Mime types accepted by the server.
|
12
13
|
ALLOWED_MIME_TYPES = [
|
@@ -107,8 +108,7 @@ module Mindee
|
|
107
108
|
@io_stream = PdfProcessor.parse(@io_stream, options)
|
108
109
|
end
|
109
110
|
|
110
|
-
# Reads a document.
|
111
|
-
# Note: only needs filename in case of some pdf files.
|
111
|
+
# Reads a document.
|
112
112
|
# @param close [Boolean]
|
113
113
|
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
114
114
|
def read_document(close: true)
|
@@ -116,9 +116,7 @@ module Mindee
|
|
116
116
|
# Avoids needlessly re-packing some files
|
117
117
|
data = @io_stream.read
|
118
118
|
@io_stream.close if close
|
119
|
-
|
120
|
-
|
121
|
-
['document', [data].pack('m')]
|
119
|
+
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
122
120
|
end
|
123
121
|
end
|
124
122
|
|
@@ -142,6 +140,16 @@ module Mindee
|
|
142
140
|
io_stream.set_encoding Encoding::BINARY
|
143
141
|
super(io_stream, filename, fix_pdf: fix_pdf)
|
144
142
|
end
|
143
|
+
|
144
|
+
# Overload of the same function to prevent a base64 from being re-encoded.
|
145
|
+
# @param close [Boolean]
|
146
|
+
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
147
|
+
def read_document(close: true)
|
148
|
+
@io_stream.seek(0)
|
149
|
+
data = @io_stream.read
|
150
|
+
@io_stream.close if close
|
151
|
+
['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
|
152
|
+
end
|
145
153
|
end
|
146
154
|
|
147
155
|
# Load a document from raw bytes.
|
@@ -178,6 +186,21 @@ module Mindee
|
|
178
186
|
@url = url
|
179
187
|
end
|
180
188
|
end
|
189
|
+
|
190
|
+
# Replaces non-ASCII characters by their unicode escape sequence.
|
191
|
+
# Keeps other characters as is.
|
192
|
+
# @return A clean String.
|
193
|
+
def self.convert_to_unicode_escape(string)
|
194
|
+
unicode_escape_string = ''.dup
|
195
|
+
string.each_char do |char|
|
196
|
+
unicode_escape_string << if char.bytesize > 1
|
197
|
+
"\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
|
198
|
+
else
|
199
|
+
char
|
200
|
+
end
|
201
|
+
end
|
202
|
+
unicode_escape_string
|
203
|
+
end
|
181
204
|
end
|
182
205
|
end
|
183
206
|
end
|
@@ -34,8 +34,10 @@ module Mindee
|
|
34
34
|
out_str << "\n:Rotation applied: #{is_rotation_applied}"
|
35
35
|
out_str << "\n\nPrediction\n=========="
|
36
36
|
out_str << "\n#{@prediction.to_s.size.positive? ? "#{@prediction}\n" : ''}"
|
37
|
-
out_str << "\nPage Predictions\n================\n\n"
|
37
|
+
out_str << "\nPage Predictions\n================\n\n" unless @pages.empty?
|
38
38
|
out_str << @pages.map(&:to_s).join("\n\n")
|
39
|
+
out_str.rstrip!
|
40
|
+
out_str
|
39
41
|
end
|
40
42
|
end
|
41
43
|
end
|