mindee 3.4.0 → 3.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +25 -0
- data/README.md +6 -1
- data/bin/mindee.rb +45 -15
- data/docs/bank_account_details_v2.md +1 -1
- data/docs/bank_check_v1.md +1 -1
- data/docs/bank_statement_fr_v1.md +175 -0
- data/docs/barcode_reader_v1.md +1 -1
- data/docs/carte_grise_v1.md +5 -5
- data/docs/carte_vitale_v1.md +1 -1
- data/docs/code_samples/bank_statement_fr_v1_async.txt +19 -0
- data/docs/code_samples/default.txt +19 -19
- data/docs/code_samples/default_async.txt +25 -0
- data/docs/code_samples/eu_driver_license_v1.txt +19 -0
- data/docs/code_samples/international_id_v1_async.txt +19 -0
- data/docs/code_samples/international_id_v2_async.txt +19 -0
- data/docs/code_samples/resume_v1_async.txt +19 -0
- data/docs/cropper_v1.md +1 -1
- data/docs/custom_v1.md +1 -1
- data/docs/eu_driver_license_v1.md +223 -0
- data/docs/expense_receipts_v5.md +1 -1
- data/docs/financial_document_v1.md +49 -41
- data/docs/generated_v1.md +90 -0
- data/docs/getting_started.md +1 -1
- data/docs/idcard_fr_v2.md +1 -1
- data/docs/international_id_v2.md +195 -0
- data/docs/invoice_splitter_v1.md +1 -1
- data/docs/invoices_v4.md +5 -2
- data/docs/license_plates_v1.md +1 -1
- data/docs/multi_receipts_detector_v1.md +1 -1
- data/docs/passport_v1.md +1 -1
- data/docs/proof_of_address_v1.md +5 -5
- data/docs/resume_v1.md +334 -0
- data/docs/us_driver_license_v1.md +3 -3
- data/docs/us_w9_v1.md +1 -1
- data/lib/mindee/client.rb +5 -3
- data/lib/mindee/http/endpoint.rb +13 -12
- data/lib/mindee/input/sources.rb +28 -5
- data/lib/mindee/parsing/common/inference.rb +3 -1
- data/lib/mindee/parsing/generated/generated_list_field.rb +58 -0
- data/lib/mindee/parsing/generated/generated_object_field.rb +109 -0
- data/lib/mindee/parsing/generated.rb +4 -0
- data/lib/mindee/parsing/standard/base_field.rb +1 -1
- data/lib/mindee/parsing.rb +1 -0
- data/lib/mindee/product/.rubocop.yml +7 -2
- data/lib/mindee/product/barcode_reader/barcode_reader_v1.rb +3 -1
- data/lib/mindee/product/cropper/cropper_v1.rb +3 -1
- data/lib/mindee/product/eu/driver_license/driver_license_v1.rb +41 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_document.rb +88 -0
- data/lib/mindee/product/eu/driver_license/driver_license_v1_page.rb +53 -0
- data/lib/mindee/product/eu/license_plate/license_plate_v1.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v1.rb +3 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2.rb +3 -1
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1.rb +41 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_document.rb +130 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_page.rb +34 -0
- data/lib/mindee/product/fr/bank_statement/bank_statement_v1_transaction.rb +64 -0
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1.rb +3 -1
- data/lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb +0 -2
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v1.rb +3 -1
- data/lib/mindee/product/fr/id_card/id_card_v2.rb +3 -1
- data/lib/mindee/product/generated/generated_v1.rb +38 -0
- data/lib/mindee/product/generated/generated_v1_document.rb +35 -0
- data/lib/mindee/product/generated/generated_v1_page.rb +51 -0
- data/lib/mindee/product/generated/generated_v1_prediction.rb +114 -0
- data/lib/mindee/product/international_id/international_id_v1.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v1_document.rb +109 -0
- data/lib/mindee/product/international_id/international_id_v1_page.rb +32 -0
- data/lib/mindee/product/international_id/international_id_v2.rb +39 -0
- data/lib/mindee/product/international_id/international_id_v2_document.rb +119 -0
- data/lib/mindee/product/international_id/international_id_v2_page.rb +32 -0
- data/lib/mindee/product/invoice/invoice_v4.rb +3 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1.rb +3 -1
- data/lib/mindee/product/passport/passport_v1.rb +3 -1
- data/lib/mindee/product/proof_of_address/proof_of_address_v1.rb +3 -1
- data/lib/mindee/product/receipt/receipt_v5.rb +3 -1
- data/lib/mindee/product/resume/resume_v1.rb +39 -0
- data/lib/mindee/product/resume/resume_v1_certificate.rb +69 -0
- data/lib/mindee/product/resume/resume_v1_document.rb +322 -0
- data/lib/mindee/product/resume/resume_v1_education.rb +90 -0
- data/lib/mindee/product/resume/resume_v1_language.rb +55 -0
- data/lib/mindee/product/resume/resume_v1_page.rb +32 -0
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +97 -0
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +55 -0
- data/lib/mindee/product/us/bank_check/bank_check_v1.rb +3 -1
- data/lib/mindee/product/us/driver_license/driver_license_v1.rb +3 -1
- data/lib/mindee/product/us/w9/w9_v1.rb +3 -1
- data/lib/mindee/product.rb +6 -0
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +4 -0
- metadata +41 -2
data/docs/invoices_v4.md
CHANGED
|
@@ -310,6 +310,9 @@ puts result.document.inference.prediction.supplier_name.value
|
|
|
310
310
|
```rb
|
|
311
311
|
for supplier_payment_details_elem in result.document.inference.prediction.supplier_payment_details do
|
|
312
312
|
puts supplier_payment_details_elem.value
|
|
313
|
+
puts supplier_payment_details_elem.rate
|
|
314
|
+
puts supplier_payment_details_elem.code
|
|
315
|
+
puts supplier_payment_details_elem.basis
|
|
313
316
|
end
|
|
314
317
|
```
|
|
315
318
|
|
|
@@ -318,7 +321,7 @@ end
|
|
|
318
321
|
|
|
319
322
|
```rb
|
|
320
323
|
for taxes_elem in result.document.inference.prediction.taxes do
|
|
321
|
-
puts taxes_elem.
|
|
324
|
+
puts taxes_elem.value
|
|
322
325
|
end
|
|
323
326
|
```
|
|
324
327
|
|
|
@@ -344,4 +347,4 @@ puts result.document.inference.prediction.total_tax.value
|
|
|
344
347
|
```
|
|
345
348
|
|
|
346
349
|
# Questions?
|
|
347
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
|
350
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/license_plates_v1.md
CHANGED
data/docs/passport_v1.md
CHANGED
|
@@ -183,4 +183,4 @@ puts result.document.inference.prediction.surname.value
|
|
|
183
183
|
```
|
|
184
184
|
|
|
185
185
|
# Questions?
|
|
186
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
|
186
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/proof_of_address_v1.md
CHANGED
|
@@ -34,12 +34,12 @@ puts result.document
|
|
|
34
34
|
########
|
|
35
35
|
Document
|
|
36
36
|
########
|
|
37
|
-
:Mindee ID:
|
|
37
|
+
:Mindee ID: 5d2361e9-405e-4fc1-8531-f92a3aef0c38
|
|
38
38
|
:Filename: default_sample.jpg
|
|
39
39
|
|
|
40
40
|
Inference
|
|
41
41
|
#########
|
|
42
|
-
:Product: mindee/proof_of_address v1.
|
|
42
|
+
:Product: mindee/proof_of_address v1.1
|
|
43
43
|
:Rotation applied: Yes
|
|
44
44
|
|
|
45
45
|
Prediction
|
|
@@ -47,7 +47,7 @@ Prediction
|
|
|
47
47
|
:Locale: en; en; USD;
|
|
48
48
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
|
49
49
|
:Issuer Company Registrations:
|
|
50
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
|
50
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
|
51
51
|
:Recipient Name:
|
|
52
52
|
:Recipient Company Registrations:
|
|
53
53
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
|
@@ -73,7 +73,7 @@ Page 0
|
|
|
73
73
|
:Locale: en; en; USD;
|
|
74
74
|
:Issuer Name: PPL ELECTRIC UTILITIES
|
|
75
75
|
:Issuer Company Registrations:
|
|
76
|
-
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN
|
|
76
|
+
:Issuer Address: 2 NORTH 9TH STREET CPC-GENN1 ALLENTOWN.PA 18101-1175
|
|
77
77
|
:Recipient Name:
|
|
78
78
|
:Recipient Company Registrations:
|
|
79
79
|
:Recipient Address: 123 MAIN ST ANYTOWN,PA 18062
|
|
@@ -204,4 +204,4 @@ puts result.document.inference.prediction.recipient_name.value
|
|
|
204
204
|
```
|
|
205
205
|
|
|
206
206
|
# Questions?
|
|
207
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
|
207
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/resume_v1.md
ADDED
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
---
|
|
2
|
+
title: Resume OCR Ruby
|
|
3
|
+
---
|
|
4
|
+
The Ruby OCR SDK supports the [Resume API](https://platform.mindee.com/mindee/resume).
|
|
5
|
+
|
|
6
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/resume/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
|
7
|
+

|
|
8
|
+
|
|
9
|
+
# Quick-Start
|
|
10
|
+
```rb
|
|
11
|
+
require 'mindee'
|
|
12
|
+
|
|
13
|
+
# Init a new client
|
|
14
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
15
|
+
|
|
16
|
+
# Load a file from disk
|
|
17
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
|
18
|
+
|
|
19
|
+
# Parse the file
|
|
20
|
+
result = mindee_client.enqueue_and_parse(
|
|
21
|
+
input_source,
|
|
22
|
+
Mindee::Product::Resume::ResumeV1
|
|
23
|
+
)
|
|
24
|
+
|
|
25
|
+
# Print a full summary of the parsed data in RST format
|
|
26
|
+
puts result.document
|
|
27
|
+
|
|
28
|
+
# Print the document-level parsed data
|
|
29
|
+
# puts result.document.inference.prediction
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
**Output (RST):**
|
|
33
|
+
```rst
|
|
34
|
+
########
|
|
35
|
+
Document
|
|
36
|
+
########
|
|
37
|
+
:Mindee ID: bc80bae0-af75-4464-95a9-2419403c75bf
|
|
38
|
+
:Filename: default_sample.jpg
|
|
39
|
+
|
|
40
|
+
Inference
|
|
41
|
+
#########
|
|
42
|
+
:Product: mindee/resume v1.0
|
|
43
|
+
:Rotation applied: No
|
|
44
|
+
|
|
45
|
+
Prediction
|
|
46
|
+
==========
|
|
47
|
+
:Document Language: ENG
|
|
48
|
+
:Document Type: RESUME
|
|
49
|
+
:Given Names: Christopher
|
|
50
|
+
:Surnames: Morgan
|
|
51
|
+
:Nationality:
|
|
52
|
+
:Email Address: christoper.m@gmail.com
|
|
53
|
+
:Phone Number: +44 (0) 20 7666 8555
|
|
54
|
+
:Address: 177 Great Portland Street, London W5W 6PQ
|
|
55
|
+
:Social Networks:
|
|
56
|
+
+----------------------+----------------------------------------------------+
|
|
57
|
+
| Name | URL |
|
|
58
|
+
+======================+====================================================+
|
|
59
|
+
| LinkedIn | linkedin.com/christopher.morgan |
|
|
60
|
+
+----------------------+----------------------------------------------------+
|
|
61
|
+
:Profession: Senior Web Developer
|
|
62
|
+
:Job Applied:
|
|
63
|
+
:Languages:
|
|
64
|
+
+----------+----------------------+
|
|
65
|
+
| Language | Level |
|
|
66
|
+
+==========+======================+
|
|
67
|
+
| SPA | Fluent |
|
|
68
|
+
+----------+----------------------+
|
|
69
|
+
| ZHO | Beginner |
|
|
70
|
+
+----------+----------------------+
|
|
71
|
+
| DEU | Intermediate |
|
|
72
|
+
+----------+----------------------+
|
|
73
|
+
:Hard Skills: HTML5
|
|
74
|
+
PHP OOP
|
|
75
|
+
JavaScript
|
|
76
|
+
CSS
|
|
77
|
+
MySQL
|
|
78
|
+
:Soft Skills: Project management
|
|
79
|
+
Strong decision maker
|
|
80
|
+
Innovative
|
|
81
|
+
Complex problem solver
|
|
82
|
+
Creative design
|
|
83
|
+
Service-focused
|
|
84
|
+
:Education:
|
|
85
|
+
+-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
|
|
86
|
+
| Domain | Degree | End Month | End Year | School | Start Month | Start Year |
|
|
87
|
+
+=================+===========================+===========+==========+===========================+=============+============+
|
|
88
|
+
| Computer Inf... | Bachelor | | | Columbia University, NY | | 2014 |
|
|
89
|
+
+-----------------+---------------------------+-----------+----------+---------------------------+-------------+------------+
|
|
90
|
+
:Professional Experiences:
|
|
91
|
+
+-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
|
|
92
|
+
| Contract Type | Department | Employer | End Month | End Year | Role | Start Month | Start Year |
|
|
93
|
+
+=================+============+===========================+===========+==========+======================+=============+============+
|
|
94
|
+
| Full-Time | | Luna Web Design, New York | 05 | 2019 | Web Developer | 09 | 2015 |
|
|
95
|
+
+-----------------+------------+---------------------------+-----------+----------+----------------------+-------------+------------+
|
|
96
|
+
:Certificates:
|
|
97
|
+
+------------+--------------------------------+---------------------------+------+
|
|
98
|
+
| Grade | Name | Provider | Year |
|
|
99
|
+
+============+================================+===========================+======+
|
|
100
|
+
| | PHP Framework (certificate)... | | 2014 |
|
|
101
|
+
+------------+--------------------------------+---------------------------+------+
|
|
102
|
+
| | Programming Languages: Java... | | |
|
|
103
|
+
+------------+--------------------------------+---------------------------+------+
|
|
104
|
+
```
|
|
105
|
+
|
|
106
|
+
# Field Types
|
|
107
|
+
## Standard Fields
|
|
108
|
+
These fields are generic and used in several products.
|
|
109
|
+
|
|
110
|
+
### Basic Field
|
|
111
|
+
Each prediction object contains a set of fields that inherit from the generic `Field` class.
|
|
112
|
+
A typical `Field` object will have the following attributes:
|
|
113
|
+
|
|
114
|
+
* **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
|
|
115
|
+
* **confidence** (Float, nil): the confidence score of the field prediction.
|
|
116
|
+
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
|
117
|
+
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
|
118
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
|
|
119
|
+
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
|
120
|
+
|
|
121
|
+
|
|
122
|
+
Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
|
|
123
|
+
|
|
124
|
+
|
|
125
|
+
### Classification Field
|
|
126
|
+
The classification field `ClassificationField` does not implement all the basic `Field` attributes. It only implements **value**, **confidence** and **page_id**.
|
|
127
|
+
|
|
128
|
+
> Note: a classification field's `value is always a `String`.
|
|
129
|
+
|
|
130
|
+
### String Field
|
|
131
|
+
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
|
132
|
+
|
|
133
|
+
## Specific Fields
|
|
134
|
+
Fields which are specific to this product; they are not used in any other product.
|
|
135
|
+
|
|
136
|
+
### Certificates Field
|
|
137
|
+
The list of certificates obtained by the candidate.
|
|
138
|
+
|
|
139
|
+
A `ResumeV1Certificate` implements the following attributes:
|
|
140
|
+
|
|
141
|
+
* `grade` (String): The grade obtained for the certificate.
|
|
142
|
+
* `name` (String): The name of certification.
|
|
143
|
+
* `provider` (String): The organization or institution that issued the certificate.
|
|
144
|
+
* `year` (String): The year when a certificate was issued or received.
|
|
145
|
+
Fields which are specific to this product; they are not used in any other product.
|
|
146
|
+
|
|
147
|
+
### Education Field
|
|
148
|
+
The list of the candidate's educational background.
|
|
149
|
+
|
|
150
|
+
A `ResumeV1Education` implements the following attributes:
|
|
151
|
+
|
|
152
|
+
* `degree_domain` (String): The area of study or specialization.
|
|
153
|
+
* `degree_type` (String): The type of degree obtained, such as Bachelor's, Master's, or Doctorate.
|
|
154
|
+
* `end_month` (String): The month when the education program or course was completed.
|
|
155
|
+
* `end_year` (String): The year when the education program or course was completed.
|
|
156
|
+
* `school` (String): The name of the school.
|
|
157
|
+
* `start_month` (String): The month when the education program or course began.
|
|
158
|
+
* `start_year` (String): The year when the education program or course began.
|
|
159
|
+
Fields which are specific to this product; they are not used in any other product.
|
|
160
|
+
|
|
161
|
+
### Languages Field
|
|
162
|
+
The list of languages that the candidate is proficient in.
|
|
163
|
+
|
|
164
|
+
A `ResumeV1Language` implements the following attributes:
|
|
165
|
+
|
|
166
|
+
* `language` (String): The language's ISO 639 code.
|
|
167
|
+
* `level` (String): The candidate's level for the language.
|
|
168
|
+
Fields which are specific to this product; they are not used in any other product.
|
|
169
|
+
|
|
170
|
+
### Professional Experiences Field
|
|
171
|
+
The list of the candidate's professional experiences.
|
|
172
|
+
|
|
173
|
+
A `ResumeV1ProfessionalExperience` implements the following attributes:
|
|
174
|
+
|
|
175
|
+
* `contract_type` (String): The type of contract for the professional experience.
|
|
176
|
+
* `department` (String): The specific department or division within the company.
|
|
177
|
+
* `employer` (String): The name of the company or organization.
|
|
178
|
+
* `end_month` (String): The month when the professional experience ended.
|
|
179
|
+
* `end_year` (String): The year when the professional experience ended.
|
|
180
|
+
* `role` (String): The position or job title held by the candidate.
|
|
181
|
+
* `start_month` (String): The month when the professional experience began.
|
|
182
|
+
* `start_year` (String): The year when the professional experience began.
|
|
183
|
+
Fields which are specific to this product; they are not used in any other product.
|
|
184
|
+
|
|
185
|
+
### Social Networks Field
|
|
186
|
+
The list of social network profiles of the candidate.
|
|
187
|
+
|
|
188
|
+
A `ResumeV1SocialNetworksUrl` implements the following attributes:
|
|
189
|
+
|
|
190
|
+
* `name` (String): The name of the social network.
|
|
191
|
+
* `url` (String): The URL of the social network.
|
|
192
|
+
|
|
193
|
+
# Attributes
|
|
194
|
+
The following fields are extracted for Resume V1:
|
|
195
|
+
|
|
196
|
+
## Address
|
|
197
|
+
**address** ([StringField](#string-field)): The location information of the candidate, including city, state, and country.
|
|
198
|
+
|
|
199
|
+
```rb
|
|
200
|
+
puts result.document.inference.prediction.address.value
|
|
201
|
+
```
|
|
202
|
+
|
|
203
|
+
## Certificates
|
|
204
|
+
**certificates** (Array<[ResumeV1Certificate](#certificates-field)>): The list of certificates obtained by the candidate.
|
|
205
|
+
|
|
206
|
+
```rb
|
|
207
|
+
for certificates_elem in result.document.inference.prediction.certificates do
|
|
208
|
+
puts certificates_elem.value
|
|
209
|
+
end
|
|
210
|
+
```
|
|
211
|
+
|
|
212
|
+
## Document Language
|
|
213
|
+
**document_language** ([StringField](#string-field)): The ISO 639 code of the language in which the document is written.
|
|
214
|
+
|
|
215
|
+
```rb
|
|
216
|
+
puts result.document.inference.prediction.document_language.value
|
|
217
|
+
```
|
|
218
|
+
|
|
219
|
+
## Document Type
|
|
220
|
+
**document_type** ([ClassificationField](#classification-field)): The type of the document sent.
|
|
221
|
+
|
|
222
|
+
```rb
|
|
223
|
+
puts result.document.inference.prediction.document_type.value
|
|
224
|
+
```
|
|
225
|
+
|
|
226
|
+
## Education
|
|
227
|
+
**education** (Array<[ResumeV1Education](#education-field)>): The list of the candidate's educational background.
|
|
228
|
+
|
|
229
|
+
```rb
|
|
230
|
+
for education_elem in result.document.inference.prediction.education do
|
|
231
|
+
puts education_elem.value
|
|
232
|
+
end
|
|
233
|
+
```
|
|
234
|
+
|
|
235
|
+
## Email Address
|
|
236
|
+
**email_address** ([StringField](#string-field)): The email address of the candidate.
|
|
237
|
+
|
|
238
|
+
```rb
|
|
239
|
+
puts result.document.inference.prediction.email_address.value
|
|
240
|
+
```
|
|
241
|
+
|
|
242
|
+
## Given Names
|
|
243
|
+
**given_names** (Array<[StringField](#string-field)>): The candidate's first or given names.
|
|
244
|
+
|
|
245
|
+
```rb
|
|
246
|
+
for given_names_elem in result.document.inference.prediction.given_names do
|
|
247
|
+
puts given_names_elem.value
|
|
248
|
+
end
|
|
249
|
+
```
|
|
250
|
+
|
|
251
|
+
## Hard Skills
|
|
252
|
+
**hard_skills** (Array<[StringField](#string-field)>): The list of the candidate's technical abilities and knowledge.
|
|
253
|
+
|
|
254
|
+
```rb
|
|
255
|
+
for hard_skills_elem in result.document.inference.prediction.hard_skills do
|
|
256
|
+
puts hard_skills_elem.value
|
|
257
|
+
end
|
|
258
|
+
```
|
|
259
|
+
|
|
260
|
+
## Job Applied
|
|
261
|
+
**job_applied** ([StringField](#string-field)): The position that the candidate is applying for.
|
|
262
|
+
|
|
263
|
+
```rb
|
|
264
|
+
puts result.document.inference.prediction.job_applied.value
|
|
265
|
+
```
|
|
266
|
+
|
|
267
|
+
## Languages
|
|
268
|
+
**languages** (Array<[ResumeV1Language](#languages-field)>): The list of languages that the candidate is proficient in.
|
|
269
|
+
|
|
270
|
+
```rb
|
|
271
|
+
for languages_elem in result.document.inference.prediction.languages do
|
|
272
|
+
puts languages_elem.value
|
|
273
|
+
end
|
|
274
|
+
```
|
|
275
|
+
|
|
276
|
+
## Nationality
|
|
277
|
+
**nationality** ([StringField](#string-field)): The ISO 3166 code for the country of citizenship of the candidate.
|
|
278
|
+
|
|
279
|
+
```rb
|
|
280
|
+
puts result.document.inference.prediction.nationality.value
|
|
281
|
+
```
|
|
282
|
+
|
|
283
|
+
## Phone Number
|
|
284
|
+
**phone_number** ([StringField](#string-field)): The phone number of the candidate.
|
|
285
|
+
|
|
286
|
+
```rb
|
|
287
|
+
puts result.document.inference.prediction.phone_number.value
|
|
288
|
+
```
|
|
289
|
+
|
|
290
|
+
## Profession
|
|
291
|
+
**profession** ([StringField](#string-field)): The candidate's current profession.
|
|
292
|
+
|
|
293
|
+
```rb
|
|
294
|
+
puts result.document.inference.prediction.profession.value
|
|
295
|
+
```
|
|
296
|
+
|
|
297
|
+
## Professional Experiences
|
|
298
|
+
**professional_experiences** (Array<[ResumeV1ProfessionalExperience](#professional-experiences-field)>): The list of the candidate's professional experiences.
|
|
299
|
+
|
|
300
|
+
```rb
|
|
301
|
+
for professional_experiences_elem in result.document.inference.prediction.professional_experiences do
|
|
302
|
+
puts professional_experiences_elem.value
|
|
303
|
+
end
|
|
304
|
+
```
|
|
305
|
+
|
|
306
|
+
## Social Networks
|
|
307
|
+
**social_networks_urls** (Array<[ResumeV1SocialNetworksUrl](#social-networks-field)>): The list of social network profiles of the candidate.
|
|
308
|
+
|
|
309
|
+
```rb
|
|
310
|
+
for social_networks_urls_elem in result.document.inference.prediction.social_networks_urls do
|
|
311
|
+
puts social_networks_urls_elem.value
|
|
312
|
+
end
|
|
313
|
+
```
|
|
314
|
+
|
|
315
|
+
## Soft Skills
|
|
316
|
+
**soft_skills** (Array<[StringField](#string-field)>): The list of the candidate's interpersonal and communication abilities.
|
|
317
|
+
|
|
318
|
+
```rb
|
|
319
|
+
for soft_skills_elem in result.document.inference.prediction.soft_skills do
|
|
320
|
+
puts soft_skills_elem.value
|
|
321
|
+
end
|
|
322
|
+
```
|
|
323
|
+
|
|
324
|
+
## Surnames
|
|
325
|
+
**surnames** (Array<[StringField](#string-field)>): The candidate's last names.
|
|
326
|
+
|
|
327
|
+
```rb
|
|
328
|
+
for surnames_elem in result.document.inference.prediction.surnames do
|
|
329
|
+
puts surnames_elem.value
|
|
330
|
+
end
|
|
331
|
+
```
|
|
332
|
+
|
|
333
|
+
# Questions?
|
|
334
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
|
@@ -54,7 +54,7 @@ Prediction
|
|
|
54
54
|
:Date Of Birth: 1957-02-01
|
|
55
55
|
:Restrictions: NONE
|
|
56
56
|
:Endorsements: NONE
|
|
57
|
-
:Class:
|
|
57
|
+
:Driver License Class: D
|
|
58
58
|
:Sex: M
|
|
59
59
|
:Height: 5-08
|
|
60
60
|
:Weight: 185
|
|
@@ -79,7 +79,7 @@ Page 0
|
|
|
79
79
|
:Date Of Birth: 1957-02-01
|
|
80
80
|
:Restrictions: NONE
|
|
81
81
|
:Endorsements: NONE
|
|
82
|
-
:Class:
|
|
82
|
+
:Driver License Class: D
|
|
83
83
|
:Sex: M
|
|
84
84
|
:Height: 5-08
|
|
85
85
|
:Weight: 185
|
|
@@ -265,4 +265,4 @@ puts result.document.inference.prediction.weight.value
|
|
|
265
265
|
```
|
|
266
266
|
|
|
267
267
|
# Questions?
|
|
268
|
-
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-
|
|
268
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/us_w9_v1.md
CHANGED
data/lib/mindee/client.rb
CHANGED
|
@@ -38,6 +38,7 @@ module Mindee
|
|
|
38
38
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
|
39
39
|
# This performs a cropping operation on the server and will increase response time.
|
|
40
40
|
#
|
|
41
|
+
#
|
|
41
42
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
|
42
43
|
def parse(
|
|
43
44
|
input_source,
|
|
@@ -80,6 +81,7 @@ module Mindee
|
|
|
80
81
|
# @param cropper [Boolean] Whether to include cropper results for each page.
|
|
81
82
|
# This performs a cropping operation on the server and will increase response time.
|
|
82
83
|
#
|
|
84
|
+
#
|
|
83
85
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
|
84
86
|
def enqueue(
|
|
85
87
|
input_source,
|
|
@@ -118,7 +120,6 @@ module Mindee
|
|
|
118
120
|
end
|
|
119
121
|
|
|
120
122
|
# rubocop:disable Metrics/ParameterLists
|
|
121
|
-
|
|
122
123
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
|
123
124
|
#
|
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
@@ -139,7 +140,7 @@ module Mindee
|
|
|
139
140
|
# This performs a cropping operation on the server and will increase response time.
|
|
140
141
|
# @param initial_delay_sec [Integer, Float, nil] initial delay before polling. Defaults to 4.
|
|
141
142
|
# @param delay_sec [Integer, Float, nil] delay between polling attempts. Defaults to 2.
|
|
142
|
-
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to
|
|
143
|
+
# @param max_retries [Integer, nil] maximum amount of retries. Defaults to 60.
|
|
143
144
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
|
144
145
|
def enqueue_and_parse(
|
|
145
146
|
input_source,
|
|
@@ -151,7 +152,7 @@ module Mindee
|
|
|
151
152
|
cropper: false,
|
|
152
153
|
initial_delay_sec: 4,
|
|
153
154
|
delay_sec: 2,
|
|
154
|
-
max_retries:
|
|
155
|
+
max_retries: 60
|
|
155
156
|
)
|
|
156
157
|
enqueue_res = enqueue(
|
|
157
158
|
input_source,
|
|
@@ -178,6 +179,7 @@ module Mindee
|
|
|
178
179
|
|
|
179
180
|
queue_res
|
|
180
181
|
end
|
|
182
|
+
|
|
181
183
|
# rubocop:enable Metrics/ParameterLists
|
|
182
184
|
|
|
183
185
|
# Load a document from an absolute path, as a string.
|
data/lib/mindee/http/endpoint.rb
CHANGED
|
@@ -46,9 +46,9 @@ module Mindee
|
|
|
46
46
|
|
|
47
47
|
# Call the prediction API.
|
|
48
48
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
49
|
-
# @param all_words [Boolean]
|
|
50
|
-
# @param close_file [Boolean]
|
|
51
|
-
# @param cropper [Boolean]
|
|
49
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
|
50
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
|
51
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
|
52
52
|
# @return [Hash]
|
|
53
53
|
def predict(input_source, all_words, close_file, cropper)
|
|
54
54
|
check_api_key
|
|
@@ -62,8 +62,9 @@ module Mindee
|
|
|
62
62
|
|
|
63
63
|
# Call the prediction API.
|
|
64
64
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
65
|
-
# @param
|
|
66
|
-
# @param
|
|
65
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
|
66
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
|
67
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
|
67
68
|
# @return [Hash]
|
|
68
69
|
def predict_async(input_source, all_words, close_file, cropper)
|
|
69
70
|
check_api_key
|
|
@@ -91,10 +92,10 @@ module Mindee
|
|
|
91
92
|
private
|
|
92
93
|
|
|
93
94
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
94
|
-
# @param all_words [Boolean]
|
|
95
|
-
# @param close_file [Boolean]
|
|
96
|
-
# @param cropper [Boolean]
|
|
97
|
-
# @return [Net::
|
|
95
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
|
96
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
|
97
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
|
98
|
+
# @return [Net::HTTP, nil]
|
|
98
99
|
def predict_req_post(input_source, all_words: false, close_file: true, cropper: false)
|
|
99
100
|
uri = URI("#{@url_root}/predict")
|
|
100
101
|
|
|
@@ -122,9 +123,9 @@ module Mindee
|
|
|
122
123
|
end
|
|
123
124
|
|
|
124
125
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
|
125
|
-
# @param all_words [Boolean]
|
|
126
|
-
# @param close_file [Boolean]
|
|
127
|
-
# @param cropper [Boolean]
|
|
126
|
+
# @param all_words [Boolean] Whether the full word extraction needs to be performed
|
|
127
|
+
# @param close_file [Boolean] Whether the file will be closed after reading
|
|
128
|
+
# @param cropper [Boolean] Whether a cropping operation will be applied
|
|
128
129
|
# @return [Net::HTTPResponse]
|
|
129
130
|
def document_queue_req_get(input_source, all_words, close_file, cropper)
|
|
130
131
|
uri = URI("#{@url_root}/predict_async")
|
data/lib/mindee/input/sources.rb
CHANGED
|
@@ -7,6 +7,7 @@ require_relative '../pdf'
|
|
|
7
7
|
|
|
8
8
|
module Mindee
|
|
9
9
|
module Input
|
|
10
|
+
# Document source handling.
|
|
10
11
|
module Source
|
|
11
12
|
# Mime types accepted by the server.
|
|
12
13
|
ALLOWED_MIME_TYPES = [
|
|
@@ -107,8 +108,7 @@ module Mindee
|
|
|
107
108
|
@io_stream = PdfProcessor.parse(@io_stream, options)
|
|
108
109
|
end
|
|
109
110
|
|
|
110
|
-
# Reads a document.
|
|
111
|
-
# Note: only needs filename in case of some pdf files.
|
|
111
|
+
# Reads a document.
|
|
112
112
|
# @param close [Boolean]
|
|
113
113
|
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
|
114
114
|
def read_document(close: true)
|
|
@@ -116,9 +116,7 @@ module Mindee
|
|
|
116
116
|
# Avoids needlessly re-packing some files
|
|
117
117
|
data = @io_stream.read
|
|
118
118
|
@io_stream.close if close
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
['document', [data].pack('m')]
|
|
119
|
+
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
|
122
120
|
end
|
|
123
121
|
end
|
|
124
122
|
|
|
@@ -142,6 +140,16 @@ module Mindee
|
|
|
142
140
|
io_stream.set_encoding Encoding::BINARY
|
|
143
141
|
super(io_stream, filename, fix_pdf: fix_pdf)
|
|
144
142
|
end
|
|
143
|
+
|
|
144
|
+
# Overload of the same function to prevent a base64 from being re-encoded.
|
|
145
|
+
# @param close [Boolean]
|
|
146
|
+
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
|
147
|
+
def read_document(close: true)
|
|
148
|
+
@io_stream.seek(0)
|
|
149
|
+
data = @io_stream.read
|
|
150
|
+
@io_stream.close if close
|
|
151
|
+
['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
|
|
152
|
+
end
|
|
145
153
|
end
|
|
146
154
|
|
|
147
155
|
# Load a document from raw bytes.
|
|
@@ -178,6 +186,21 @@ module Mindee
|
|
|
178
186
|
@url = url
|
|
179
187
|
end
|
|
180
188
|
end
|
|
189
|
+
|
|
190
|
+
# Replaces non-ASCII characters by their unicode escape sequence.
|
|
191
|
+
# Keeps other characters as is.
|
|
192
|
+
# @return A clean String.
|
|
193
|
+
def self.convert_to_unicode_escape(string)
|
|
194
|
+
unicode_escape_string = ''.dup
|
|
195
|
+
string.each_char do |char|
|
|
196
|
+
unicode_escape_string << if char.bytesize > 1
|
|
197
|
+
"\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
|
|
198
|
+
else
|
|
199
|
+
char
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
unicode_escape_string
|
|
203
|
+
end
|
|
181
204
|
end
|
|
182
205
|
end
|
|
183
206
|
end
|
|
@@ -34,8 +34,10 @@ module Mindee
|
|
|
34
34
|
out_str << "\n:Rotation applied: #{is_rotation_applied}"
|
|
35
35
|
out_str << "\n\nPrediction\n=========="
|
|
36
36
|
out_str << "\n#{@prediction.to_s.size.positive? ? "#{@prediction}\n" : ''}"
|
|
37
|
-
out_str << "\nPage Predictions\n================\n\n"
|
|
37
|
+
out_str << "\nPage Predictions\n================\n\n" unless @pages.empty?
|
|
38
38
|
out_str << @pages.map(&:to_s).join("\n\n")
|
|
39
|
+
out_str.rstrip!
|
|
40
|
+
out_str
|
|
39
41
|
end
|
|
40
42
|
end
|
|
41
43
|
end
|