mindee 3.12.0 → 3.13.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +1 -1
- data/CHANGELOG.md +18 -0
- data/README.md +23 -23
- data/Rakefile +5 -0
- data/docs/bank_account_details_v2.md +5 -1
- data/docs/bank_check_v1.md +6 -2
- data/docs/bank_statement_fr_v1.md +3 -0
- data/docs/barcode_reader_v1.md +5 -1
- data/docs/bill_of_lading_v1.md +202 -0
- data/docs/carte_grise_v1.md +5 -1
- data/docs/carte_vitale_v1.md +5 -1
- data/docs/code_samples/bill_of_lading_v1_async.txt +19 -0
- data/docs/code_samples/energy_bill_fra_v1_async.txt +19 -0
- data/docs/code_samples/invoices_v4_async.txt +19 -0
- data/docs/code_samples/nutrition_facts_v1_async.txt +19 -0
- data/docs/code_samples/payslip_fra_v2_async.txt +19 -0
- data/docs/cropper_v1.md +6 -2
- data/docs/custom_v1.md +5 -3
- data/docs/energy_bill_fra_v1.md +249 -0
- data/docs/eu_driver_license_v1.md +6 -2
- data/docs/expense_receipts_v5.md +26 -1
- data/docs/financial_document_v1.md +29 -1
- data/docs/generated_v1.md +3 -0
- data/docs/getting_started.md +3 -0
- data/docs/idcard_fr_v2.md +15 -2
- data/docs/international_id_v2.md +13 -1
- data/docs/invoice_splitter_v1.md +16 -13
- data/docs/invoices_v4.md +54 -21
- data/docs/license_plates_v1.md +5 -1
- data/docs/multi_receipts_detector_v1.md +5 -1
- data/docs/nutrition_facts_v1.md +295 -0
- data/docs/passport_v1.md +5 -1
- data/docs/payslip_fra_v2.md +218 -0
- data/docs/proof_of_address_v1.md +5 -1
- data/docs/resume_v1.md +24 -1
- data/docs/us_driver_license_v1.md +6 -2
- data/docs/us_healthcare_cards_v1.md +5 -1
- data/docs/us_mail_v2.md +5 -1
- data/docs/us_w9_v1.md +6 -2
- data/examples/auto_invoice_splitter_extraction.rb +43 -0
- data/lib/mindee/client.rb +20 -8
- data/lib/mindee/{image_extraction → extraction}/common/image_extractor.rb +2 -4
- data/lib/mindee/{image_extraction → extraction}/common.rb +1 -0
- data/lib/mindee/extraction/pdf_extractor/extracted_pdf.rb +55 -0
- data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +111 -0
- data/lib/mindee/extraction/pdf_extractor.rb +4 -0
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +322 -0
- data/lib/mindee/extraction/tax_extractor.rb +1 -320
- data/lib/mindee/extraction.rb +3 -0
- data/lib/mindee/http/endpoint.rb +18 -6
- data/lib/mindee/parsing/common/api_response.rb +1 -1
- data/lib/mindee/parsing/common/document.rb +31 -1
- data/lib/mindee/parsing/common/extras/cropper_extra.rb +29 -0
- data/lib/mindee/parsing/common/extras/extras.rb +50 -0
- data/lib/mindee/parsing/common/extras/full_text_ocr_extra.rb +32 -0
- data/lib/mindee/parsing/common/extras.rb +5 -0
- data/lib/mindee/parsing/common/page.rb +5 -0
- data/lib/mindee/parsing/standard/base_field.rb +1 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1.rb +39 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier.rb +52 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_carrier_item.rb +95 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_consignee.rb +58 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_document.rb +136 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_notify_party.rb +58 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_page.rb +32 -0
- data/lib/mindee/product/bill_of_lading/bill_of_lading_v1_shipper.rb +58 -0
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +15 -1
- data/lib/mindee/product/fr/bank_account_details/bank_account_details_v2_bban.rb +4 -15
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1.rb +41 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb +235 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb +48 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_supplier.rb +48 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_usage.rb +97 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_meter_detail.rb +54 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_page.rb +34 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_subscription.rb +97 -0
- data/lib/mindee/product/fr/energy_bill/energy_bill_v1_taxes_and_contribution.rb +97 -0
- data/lib/mindee/product/fr/payslip/payslip_v2.rb +41 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_bank_account_detail.rb +54 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_document.rb +128 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employee.rb +78 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employer.rb +78 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_employment.rb +72 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_page.rb +34 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_detail.rb +100 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pay_period.rb +66 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_pto.rb +56 -0
- data/lib/mindee/product/fr/payslip/payslip_v2_salary_detail.rb +81 -0
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +15 -1
- data/lib/mindee/product/invoice_splitter/invoice_splitter_v1_document.rb +1 -1
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1.rb +39 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_added_sugar.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_calorie.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_cholesterol.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_dietary_fiber.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_document.rb +173 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_nutrient.rb +87 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_page.rb +32 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_protein.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_saturated_fat.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_serving_size.rb +46 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_sodium.rb +58 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_carbohydrate.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_fat.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_total_sugar.rb +52 -0
- data/lib/mindee/product/nutrition_facts_label/nutrition_facts_label_v1_trans_fat.rb +52 -0
- data/lib/mindee/product/receipt/receipt_v5_line_item.rb +11 -1
- data/lib/mindee/product/resume/resume_v1_certificate.rb +11 -1
- data/lib/mindee/product/resume/resume_v1_education.rb +14 -1
- data/lib/mindee/product/resume/resume_v1_language.rb +9 -1
- data/lib/mindee/product/resume/resume_v1_professional_experience.rb +15 -1
- data/lib/mindee/product/resume/resume_v1_social_networks_url.rb +9 -1
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +9 -1
- data/lib/mindee/product/us/us_mail/us_mail_v2_recipient_address.rb +14 -1
- data/lib/mindee/product/us/us_mail/us_mail_v2_sender_address.rb +5 -17
- data/lib/mindee/product.rb +5 -1
- data/lib/mindee/version.rb +1 -1
- metadata +70 -9
- data/lib/mindee/image_extraction.rb +0 -4
- /data/lib/mindee/{image_extraction → extraction}/common/extracted_image.rb +0 -0
- /data/lib/mindee/{image_extraction → extraction}/multi_receipts_extractor/multi_receipts_extractor.rb +0 -0
- /data/lib/mindee/{image_extraction → extraction}/multi_receipts_extractor.rb +0 -0
- /data/lib/mindee/extraction/{ocr_extractor.rb → tax_extractor/ocr_extractor.rb} +0 -0
@@ -0,0 +1,218 @@
|
|
1
|
+
---
|
2
|
+
title: FR Payslip OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-fr-payslip-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
6
|
+
---
|
7
|
+
The Ruby OCR SDK supports the [Payslip API](https://platform.mindee.com/mindee/payslip_fra).
|
8
|
+
|
9
|
+
The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg) can be used for testing purposes.
|
10
|
+

|
11
|
+
|
12
|
+
# Quick-Start
|
13
|
+
```rb
|
14
|
+
require 'mindee'
|
15
|
+
|
16
|
+
# Init a new client
|
17
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
18
|
+
|
19
|
+
# Load a file from disk
|
20
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
21
|
+
|
22
|
+
# Parse the file
|
23
|
+
result = mindee_client.enqueue_and_parse(
|
24
|
+
input_source,
|
25
|
+
Mindee::Product::FR::Payslip::PayslipV2
|
26
|
+
)
|
27
|
+
|
28
|
+
# Print a full summary of the parsed data in RST format
|
29
|
+
puts result.document
|
30
|
+
|
31
|
+
# Print the document-level parsed data
|
32
|
+
# puts result.document.inference.prediction
|
33
|
+
|
34
|
+
```
|
35
|
+
# Field Types
|
36
|
+
## Standard Fields
|
37
|
+
These fields are generic and used in several products.
|
38
|
+
|
39
|
+
### Basic Field
|
40
|
+
Each prediction object contains a set of fields that inherit from the generic `Field` class.
|
41
|
+
A typical `Field` object will have the following attributes:
|
42
|
+
|
43
|
+
* **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
|
44
|
+
* **confidence** (Float, nil): the confidence score of the field prediction.
|
45
|
+
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
46
|
+
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
47
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
48
|
+
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
49
|
+
|
50
|
+
|
51
|
+
Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
|
52
|
+
|
53
|
+
## Specific Fields
|
54
|
+
Fields which are specific to this product; they are not used in any other product.
|
55
|
+
|
56
|
+
### Bank Account Details Field
|
57
|
+
Information about the employee's bank account.
|
58
|
+
|
59
|
+
A `PayslipV2BankAccountDetail` implements the following attributes:
|
60
|
+
|
61
|
+
* `bank_name` (String): The name of the bank.
|
62
|
+
* `iban` (String): The IBAN of the bank account.
|
63
|
+
* `swift` (String): The SWIFT code of the bank.
|
64
|
+
Fields which are specific to this product; they are not used in any other product.
|
65
|
+
|
66
|
+
### Employee Field
|
67
|
+
Information about the employee.
|
68
|
+
|
69
|
+
A `PayslipV2Employee` implements the following attributes:
|
70
|
+
|
71
|
+
* `address` (String): The address of the employee.
|
72
|
+
* `date_of_birth` (String): The date of birth of the employee.
|
73
|
+
* `first_name` (String): The first name of the employee.
|
74
|
+
* `last_name` (String): The last name of the employee.
|
75
|
+
* `phone_number` (String): The phone number of the employee.
|
76
|
+
* `registration_number` (String): The registration number of the employee.
|
77
|
+
* `social_security_number` (String): The social security number of the employee.
|
78
|
+
Fields which are specific to this product; they are not used in any other product.
|
79
|
+
|
80
|
+
### Employer Field
|
81
|
+
Information about the employer.
|
82
|
+
|
83
|
+
A `PayslipV2Employer` implements the following attributes:
|
84
|
+
|
85
|
+
* `address` (String): The address of the employer.
|
86
|
+
* `company_id` (String): The company ID of the employer.
|
87
|
+
* `company_site` (String): The site of the company.
|
88
|
+
* `naf_code` (String): The NAF code of the employer.
|
89
|
+
* `name` (String): The name of the employer.
|
90
|
+
* `phone_number` (String): The phone number of the employer.
|
91
|
+
* `urssaf_number` (String): The URSSAF number of the employer.
|
92
|
+
Fields which are specific to this product; they are not used in any other product.
|
93
|
+
|
94
|
+
### Employment Field
|
95
|
+
Information about the employment.
|
96
|
+
|
97
|
+
A `PayslipV2Employment` implements the following attributes:
|
98
|
+
|
99
|
+
* `category` (String): The category of the employment.
|
100
|
+
* `coefficient` (Float): The coefficient of the employment.
|
101
|
+
* `collective_agreement` (String): The collective agreement of the employment.
|
102
|
+
* `job_title` (String): The job title of the employee.
|
103
|
+
* `position_level` (String): The position level of the employment.
|
104
|
+
* `start_date` (String): The start date of the employment.
|
105
|
+
Fields which are specific to this product; they are not used in any other product.
|
106
|
+
|
107
|
+
### Pay Detail Field
|
108
|
+
Detailed information about the pay.
|
109
|
+
|
110
|
+
A `PayslipV2PayDetail` implements the following attributes:
|
111
|
+
|
112
|
+
* `gross_salary` (Float): The gross salary of the employee.
|
113
|
+
* `gross_salary_ytd` (Float): The year-to-date gross salary of the employee.
|
114
|
+
* `income_tax_rate` (Float): The income tax rate of the employee.
|
115
|
+
* `income_tax_withheld` (Float): The income tax withheld from the employee's pay.
|
116
|
+
* `net_paid` (Float): The net paid amount of the employee.
|
117
|
+
* `net_paid_before_tax` (Float): The net paid amount before tax of the employee.
|
118
|
+
* `net_taxable` (Float): The net taxable amount of the employee.
|
119
|
+
* `net_taxable_ytd` (Float): The year-to-date net taxable amount of the employee.
|
120
|
+
* `total_cost_employer` (Float): The total cost to the employer.
|
121
|
+
* `total_taxes_and_deductions` (Float): The total taxes and deductions of the employee.
|
122
|
+
Fields which are specific to this product; they are not used in any other product.
|
123
|
+
|
124
|
+
### Pay Period Field
|
125
|
+
Information about the pay period.
|
126
|
+
|
127
|
+
A `PayslipV2PayPeriod` implements the following attributes:
|
128
|
+
|
129
|
+
* `end_date` (String): The end date of the pay period.
|
130
|
+
* `month` (String): The month of the pay period.
|
131
|
+
* `payment_date` (String): The date of payment for the pay period.
|
132
|
+
* `start_date` (String): The start date of the pay period.
|
133
|
+
* `year` (String): The year of the pay period.
|
134
|
+
Fields which are specific to this product; they are not used in any other product.
|
135
|
+
|
136
|
+
### PTO Field
|
137
|
+
Information about paid time off.
|
138
|
+
|
139
|
+
A `PayslipV2Pto` implements the following attributes:
|
140
|
+
|
141
|
+
* `accrued_this_period` (Float): The amount of paid time off accrued in this period.
|
142
|
+
* `balance_end_of_period` (Float): The balance of paid time off at the end of the period.
|
143
|
+
* `used_this_period` (Float): The amount of paid time off used in this period.
|
144
|
+
Fields which are specific to this product; they are not used in any other product.
|
145
|
+
|
146
|
+
### Salary Details Field
|
147
|
+
Detailed information about the earnings.
|
148
|
+
|
149
|
+
A `PayslipV2SalaryDetail` implements the following attributes:
|
150
|
+
|
151
|
+
* `amount` (Float): The amount of the earnings.
|
152
|
+
* `base` (Float): The base value of the earnings.
|
153
|
+
* `description` (String): The description of the earnings.
|
154
|
+
* `rate` (Float): The rate of the earnings.
|
155
|
+
|
156
|
+
# Attributes
|
157
|
+
The following fields are extracted for Payslip V2:
|
158
|
+
|
159
|
+
## Bank Account Details
|
160
|
+
**bank_account_details** ([PayslipV2BankAccountDetail](#bank-account-details-field)): Information about the employee's bank account.
|
161
|
+
|
162
|
+
```rb
|
163
|
+
puts result.document.inference.prediction.bank_account_details.value
|
164
|
+
```
|
165
|
+
|
166
|
+
## Employee
|
167
|
+
**employee** ([PayslipV2Employee](#employee-field)): Information about the employee.
|
168
|
+
|
169
|
+
```rb
|
170
|
+
puts result.document.inference.prediction.employee.value
|
171
|
+
```
|
172
|
+
|
173
|
+
## Employer
|
174
|
+
**employer** ([PayslipV2Employer](#employer-field)): Information about the employer.
|
175
|
+
|
176
|
+
```rb
|
177
|
+
puts result.document.inference.prediction.employer.value
|
178
|
+
```
|
179
|
+
|
180
|
+
## Employment
|
181
|
+
**employment** ([PayslipV2Employment](#employment-field)): Information about the employment.
|
182
|
+
|
183
|
+
```rb
|
184
|
+
puts result.document.inference.prediction.employment.value
|
185
|
+
```
|
186
|
+
|
187
|
+
## Pay Detail
|
188
|
+
**pay_detail** ([PayslipV2PayDetail](#pay-detail-field)): Detailed information about the pay.
|
189
|
+
|
190
|
+
```rb
|
191
|
+
puts result.document.inference.prediction.pay_detail.value
|
192
|
+
```
|
193
|
+
|
194
|
+
## Pay Period
|
195
|
+
**pay_period** ([PayslipV2PayPeriod](#pay-period-field)): Information about the pay period.
|
196
|
+
|
197
|
+
```rb
|
198
|
+
puts result.document.inference.prediction.pay_period.value
|
199
|
+
```
|
200
|
+
|
201
|
+
## PTO
|
202
|
+
**pto** ([PayslipV2Pto](#pto-field)): Information about paid time off.
|
203
|
+
|
204
|
+
```rb
|
205
|
+
puts result.document.inference.prediction.pto.value
|
206
|
+
```
|
207
|
+
|
208
|
+
## Salary Details
|
209
|
+
**salary_details** (Array<[PayslipV2SalaryDetail](#salary-details-field)>): Detailed information about the earnings.
|
210
|
+
|
211
|
+
```rb
|
212
|
+
for salary_details_elem in result.document.inference.prediction.salary_details do
|
213
|
+
puts salary_details_elem.value
|
214
|
+
end
|
215
|
+
```
|
216
|
+
|
217
|
+
# Questions?
|
218
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
data/docs/proof_of_address_v1.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: Proof of Address OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-proof-of-address-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [Proof of Address API](https://platform.mindee.com/mindee/proof_of_address).
|
5
8
|
|
@@ -27,6 +30,7 @@ puts result.document
|
|
27
30
|
|
28
31
|
# Print the document-level parsed data
|
29
32
|
# puts result.document.inference.prediction
|
33
|
+
|
30
34
|
```
|
31
35
|
|
32
36
|
**Output (RST):**
|
@@ -104,7 +108,7 @@ A typical `Field` object will have the following attributes:
|
|
104
108
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
105
109
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
106
110
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
107
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
111
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
108
112
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
109
113
|
|
110
114
|
|
data/docs/resume_v1.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: Resume OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-resume-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [Resume API](https://platform.mindee.com/mindee/resume).
|
5
8
|
|
@@ -27,6 +30,7 @@ puts result.document
|
|
27
30
|
|
28
31
|
# Print the document-level parsed data
|
29
32
|
# puts result.document.inference.prediction
|
33
|
+
|
30
34
|
```
|
31
35
|
|
32
36
|
**Output (RST):**
|
@@ -115,7 +119,7 @@ A typical `Field` object will have the following attributes:
|
|
115
119
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
116
120
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
117
121
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
118
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
122
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
119
123
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
120
124
|
|
121
125
|
|
@@ -165,6 +169,13 @@ A `ResumeV1Language` implements the following attributes:
|
|
165
169
|
|
166
170
|
* `language` (String): The language's ISO 639 code.
|
167
171
|
* `level` (String): The candidate's level for the language.
|
172
|
+
|
173
|
+
#### Possible values include:
|
174
|
+
- Fluent
|
175
|
+
- Proficient
|
176
|
+
- Intermediate
|
177
|
+
- Beginner
|
178
|
+
|
168
179
|
Fields which are specific to this product; they are not used in any other product.
|
169
180
|
|
170
181
|
### Professional Experiences Field
|
@@ -173,6 +184,13 @@ The list of the candidate's professional experiences.
|
|
173
184
|
A `ResumeV1ProfessionalExperience` implements the following attributes:
|
174
185
|
|
175
186
|
* `contract_type` (String): The type of contract for the professional experience.
|
187
|
+
|
188
|
+
#### Possible values include:
|
189
|
+
- Full-Time
|
190
|
+
- Part-Time
|
191
|
+
- Internship
|
192
|
+
- Freelance
|
193
|
+
|
176
194
|
* `department` (String): The specific department or division within the company.
|
177
195
|
* `employer` (String): The name of the company or organization.
|
178
196
|
* `end_month` (String): The month when the professional experience ended.
|
@@ -219,6 +237,11 @@ puts result.document.inference.prediction.document_language.value
|
|
219
237
|
## Document Type
|
220
238
|
**document_type** ([ClassificationField](#classification-field)): The type of the document sent.
|
221
239
|
|
240
|
+
#### Possible values include:
|
241
|
+
- RESUME
|
242
|
+
- MOTIVATION_LETTER
|
243
|
+
- RECOMMENDATION_LETTER
|
244
|
+
|
222
245
|
```rb
|
223
246
|
puts result.document.inference.prediction.document_type.value
|
224
247
|
```
|
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: US Driver License OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-us-driver-license-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [Driver License API](https://platform.mindee.com/mindee/us_driver_license).
|
5
8
|
|
@@ -27,6 +30,7 @@ puts result.document
|
|
27
30
|
|
28
31
|
# Print the document-level parsed data
|
29
32
|
# puts result.document.inference.prediction
|
33
|
+
|
30
34
|
```
|
31
35
|
|
32
36
|
**Output (RST):**
|
@@ -100,7 +104,7 @@ A typical `Field` object will have the following attributes:
|
|
100
104
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
101
105
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
102
106
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
103
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
107
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
104
108
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
105
109
|
|
106
110
|
|
@@ -122,7 +126,7 @@ The position field `PositionField` does not implement all the basic `Field` attr
|
|
122
126
|
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
123
127
|
|
124
128
|
## Page-Level Fields
|
125
|
-
Some fields are constrained to the page level, and so will not be retrievable
|
129
|
+
Some fields are constrained to the page level, and so will not be retrievable at document level.
|
126
130
|
|
127
131
|
# Attributes
|
128
132
|
The following fields are extracted for Driver License V1:
|
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: US Healthcare Card OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-us-healthcare-card-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [Healthcare Card API](https://platform.mindee.com/mindee/us_healthcare_cards).
|
5
8
|
|
@@ -27,6 +30,7 @@ puts result.document
|
|
27
30
|
|
28
31
|
# Print the document-level parsed data
|
29
32
|
# puts result.document.inference.prediction
|
33
|
+
|
30
34
|
```
|
31
35
|
|
32
36
|
**Output (RST):**
|
@@ -84,7 +88,7 @@ A typical `Field` object will have the following attributes:
|
|
84
88
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
85
89
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
86
90
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
87
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
91
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
88
92
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
89
93
|
|
90
94
|
|
data/docs/us_mail_v2.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: US US Mail OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-us-us-mail-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [US Mail API](https://platform.mindee.com/mindee/us_mail).
|
5
8
|
|
@@ -27,6 +30,7 @@ puts result.document
|
|
27
30
|
|
28
31
|
# Print the document-level parsed data
|
29
32
|
# puts result.document.inference.prediction
|
33
|
+
|
30
34
|
```
|
31
35
|
|
32
36
|
**Output (RST):**
|
@@ -59,7 +63,7 @@ A typical `Field` object will have the following attributes:
|
|
59
63
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
60
64
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
61
65
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
62
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
66
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
63
67
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
64
68
|
|
65
69
|
|
data/docs/us_w9_v1.md
CHANGED
@@ -1,5 +1,8 @@
|
|
1
1
|
---
|
2
2
|
title: US W9 OCR Ruby
|
3
|
+
category: 622b805aaec68102ea7fcbc2
|
4
|
+
slug: ruby-us-w9-ocr
|
5
|
+
parentDoc: 6294d97ee723f1008d2ab28e
|
3
6
|
---
|
4
7
|
The Ruby OCR SDK supports the [W9 API](https://platform.mindee.com/mindee/us_w9).
|
5
8
|
|
@@ -24,6 +27,7 @@ result = mindee_client.parse(
|
|
24
27
|
|
25
28
|
# Print a full summary of the parsed data in RST format
|
26
29
|
puts result.document
|
30
|
+
|
27
31
|
```
|
28
32
|
|
29
33
|
**Output (RST):**
|
@@ -73,7 +77,7 @@ A typical `Field` object will have the following attributes:
|
|
73
77
|
* **confidence** (Float, nil): the confidence score of the field prediction.
|
74
78
|
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
75
79
|
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
76
|
-
* **page_id** (`Integer`, `nil`): the ID of the page,
|
80
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, always `nil` when at document-level.
|
77
81
|
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
78
82
|
|
79
83
|
|
@@ -90,7 +94,7 @@ The position field `PositionField` does not implement all the basic `Field` attr
|
|
90
94
|
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
91
95
|
|
92
96
|
## Page-Level Fields
|
93
|
-
Some fields are constrained to the page level, and so will not be retrievable
|
97
|
+
Some fields are constrained to the page level, and so will not be retrievable at document level.
|
94
98
|
|
95
99
|
# Attributes
|
96
100
|
The following fields are extracted for W9 V1:
|
@@ -0,0 +1,43 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mindee'
|
4
|
+
|
5
|
+
# Init a new client
|
6
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
+
|
8
|
+
# Load a file from disk
|
9
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
10
|
+
|
11
|
+
if input_source.pdf?
|
12
|
+
pdf_extractor = Mindee::Extraction::PdfExtractor.new(input_source)
|
13
|
+
if pdf_extractor.page_count > 1
|
14
|
+
invoice_splitter_response = mindee_client.enqueue_and_parse(
|
15
|
+
input_source,
|
16
|
+
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1
|
17
|
+
)
|
18
|
+
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
|
19
|
+
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
|
20
|
+
extracted_pdfs.each do |extracted_pdf|
|
21
|
+
# Optional: Save the files locally
|
22
|
+
# extracted_pdf.write_to_file("output/path")
|
23
|
+
|
24
|
+
invoice_result = mindee_client.parse(
|
25
|
+
InvoiceV4,
|
26
|
+
extracted_pdf.as_source
|
27
|
+
)
|
28
|
+
puts invoice_result
|
29
|
+
end
|
30
|
+
else
|
31
|
+
invoice_result = mindee_client.parse(
|
32
|
+
input_source,
|
33
|
+
Mindee::Product::Invoice::InvoiceV4
|
34
|
+
)
|
35
|
+
puts invoice_result.document
|
36
|
+
end
|
37
|
+
else
|
38
|
+
invoice_result = mindee_client.parse(
|
39
|
+
input_source,
|
40
|
+
Mindee::Product::Invoice::InvoiceV4
|
41
|
+
)
|
42
|
+
puts invoice_result.document
|
43
|
+
end
|
data/lib/mindee/client.rb
CHANGED
@@ -17,13 +17,16 @@ module Mindee
|
|
17
17
|
# Call prediction API on a document and parse the results.
|
18
18
|
#
|
19
19
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
20
|
-
# @param product_class [Mindee::
|
20
|
+
# @param product_class [Mindee::Inference] class of the product
|
21
21
|
# @param endpoint [HTTP::Endpoint] Endpoint of the API
|
22
22
|
# Doesn't need to be set in the case of OTS APIs.
|
23
23
|
#
|
24
24
|
# @param all_words [Boolean] Whether to include the full text for each page.
|
25
25
|
# This performs a full OCR operation on the server and will increase response time.
|
26
26
|
#
|
27
|
+
# @param full_text [Boolean] Whether to include the full OCR text response in compatible APIs.
|
28
|
+
# This performs a full OCR operation on the server and may increase response time.
|
29
|
+
#
|
27
30
|
# @param close_file [Boolean] Whether to `close()` the file after parsing it.
|
28
31
|
# Set to false if you need to access the file after this operation.
|
29
32
|
#
|
@@ -45,6 +48,7 @@ module Mindee
|
|
45
48
|
product_class,
|
46
49
|
endpoint: nil,
|
47
50
|
all_words: false,
|
51
|
+
full_text: false,
|
48
52
|
close_file: true,
|
49
53
|
page_options: nil,
|
50
54
|
cropper: false
|
@@ -53,20 +57,23 @@ module Mindee
|
|
53
57
|
input_source.process_pdf(page_options)
|
54
58
|
end
|
55
59
|
endpoint = initialize_endpoint(product_class) if endpoint.nil?
|
56
|
-
prediction, raw_http = endpoint.predict(input_source, all_words, close_file, cropper)
|
60
|
+
prediction, raw_http = endpoint.predict(input_source, all_words, full_text, close_file, cropper)
|
57
61
|
Mindee::Parsing::Common::ApiResponse.new(product_class, prediction, raw_http)
|
58
62
|
end
|
59
63
|
|
60
64
|
# Enqueue a document for async parsing
|
61
65
|
#
|
66
|
+
# @param product_class [Mindee::Inference] class of the product
|
62
67
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
63
|
-
# @param product_class [Mindee::Product] class of the product
|
64
68
|
# @param endpoint [HTTP::Endpoint, nil] Endpoint of the API.
|
65
69
|
# Doesn't need to be set in the case of OTS APIs.
|
66
70
|
#
|
67
71
|
# @param all_words [Boolean] Whether to extract all the words on each page.
|
68
72
|
# This performs a full OCR operation on the server and will increase response time.
|
69
73
|
#
|
74
|
+
# @param full_text [Boolean] Whether to include the full OCR text response in compatible APIs.
|
75
|
+
# This performs a full OCR operation on the server and may increase response time.
|
76
|
+
#
|
70
77
|
# @param close_file [Boolean] Whether to `close()` the file after parsing it.
|
71
78
|
# Set to false if you need to access the file after this operation.
|
72
79
|
#
|
@@ -88,6 +95,7 @@ module Mindee
|
|
88
95
|
product_class,
|
89
96
|
endpoint: nil,
|
90
97
|
all_words: false,
|
98
|
+
full_text: false,
|
91
99
|
close_file: true,
|
92
100
|
page_options: nil,
|
93
101
|
cropper: false
|
@@ -96,7 +104,7 @@ module Mindee
|
|
96
104
|
input_source.process_pdf(page_options)
|
97
105
|
end
|
98
106
|
endpoint = initialize_endpoint(product_class) if endpoint.nil?
|
99
|
-
prediction, raw_http = endpoint.predict_async(input_source, all_words, close_file, cropper)
|
107
|
+
prediction, raw_http = endpoint.predict_async(input_source, all_words, full_text, close_file, cropper)
|
100
108
|
Mindee::Parsing::Common::ApiResponse.new(product_class,
|
101
109
|
prediction, raw_http)
|
102
110
|
end
|
@@ -104,7 +112,7 @@ module Mindee
|
|
104
112
|
# Parses a queued document
|
105
113
|
#
|
106
114
|
# @param job_id [String] Id of the job (queue) to poll from
|
107
|
-
# @param product_class [Mindee::
|
115
|
+
# @param product_class [Mindee::Inference] class of the product
|
108
116
|
# @param endpoint [HTTP::Endpoint, nil] Endpoint of the API
|
109
117
|
# Doesn't need to be set in the case of OTS APIs.
|
110
118
|
#
|
@@ -123,11 +131,13 @@ module Mindee
|
|
123
131
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
124
132
|
#
|
125
133
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
126
|
-
# @param product_class [Mindee::
|
134
|
+
# @param product_class [Mindee::Inference] class of the product
|
127
135
|
# @param endpoint [HTTP::Endpoint, nil] Endpoint of the API.
|
128
136
|
# Doesn't need to be set in the case of OTS APIs.
|
129
137
|
# @param all_words [Boolean] Whether to extract all the words on each page.
|
130
138
|
# This performs a full OCR operation on the server and will increase response time.
|
139
|
+
# @param full_text [Boolean] Whether to include the full OCR text response in compatible APIs.
|
140
|
+
# This performs a full OCR operation on the server and may increase response time.
|
131
141
|
# @param close_file [Boolean] Whether to `close()` the file after parsing it.
|
132
142
|
# Set to false if you need to access the file after this operation.
|
133
143
|
# @param page_options [Hash, nil] Page cutting/merge options:
|
@@ -147,6 +157,7 @@ module Mindee
|
|
147
157
|
product_class,
|
148
158
|
endpoint: nil,
|
149
159
|
all_words: false,
|
160
|
+
full_text: false,
|
150
161
|
close_file: true,
|
151
162
|
page_options: nil,
|
152
163
|
cropper: false,
|
@@ -159,6 +170,7 @@ module Mindee
|
|
159
170
|
product_class,
|
160
171
|
endpoint: endpoint,
|
161
172
|
all_words: all_words,
|
173
|
+
full_text: full_text,
|
162
174
|
close_file: close_file,
|
163
175
|
page_options: page_options,
|
164
176
|
cropper: cropper
|
@@ -184,7 +196,7 @@ module Mindee
|
|
184
196
|
|
185
197
|
# Load a prediction.
|
186
198
|
#
|
187
|
-
# @param product_class [Mindee::
|
199
|
+
# @param product_class [Mindee::Inference] class of the product
|
188
200
|
# @param local_response [Mindee::Input::LocalResponse]
|
189
201
|
# @return [Mindee::Parsing::Common::ApiResponse]
|
190
202
|
def load_prediction(product_class, local_response)
|
@@ -269,7 +281,7 @@ module Mindee
|
|
269
281
|
end
|
270
282
|
|
271
283
|
# Creates an endpoint with the given values. Raises an error if the endpoint is invalid.
|
272
|
-
# @param product_class [Mindee::
|
284
|
+
# @param product_class [Mindee::Inference] class of the product
|
273
285
|
#
|
274
286
|
# @param endpoint_name [String] For custom endpoints, the "API name" field in the "Settings" page of the
|
275
287
|
# API Builder. Do not set for standard (off the shelf) endpoints.
|
@@ -10,7 +10,7 @@ require_relative 'extracted_image'
|
|
10
10
|
module Mindee
|
11
11
|
# Image Extraction Module.
|
12
12
|
module ImageExtraction
|
13
|
-
def attach_image_as_new_file(input_buffer)
|
13
|
+
def self.attach_image_as_new_file(input_buffer)
|
14
14
|
# Attaches an image as a new page in a PdfDocument object.
|
15
15
|
#
|
16
16
|
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
@@ -24,9 +24,7 @@ module Mindee
|
|
24
24
|
scale_factor = original_density[0].to_f / 4.166666 # No clue why bit the resolution needs to be reduced for
|
25
25
|
# the pdf otherwise the resulting image shrinks.
|
26
26
|
magick_image.format('pdf', 0, { density: scale_factor.to_s })
|
27
|
-
|
28
|
-
magick_image.write(io_buffer)
|
29
|
-
Origami::PDF.read(io_buffer)
|
27
|
+
Origami::PDF.read(StringIO.new(magick_image.to_blob))
|
30
28
|
end
|
31
29
|
|
32
30
|
# Extracts multiple images from a given local input source.
|
@@ -0,0 +1,55 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
# Pdf Extraction Module.
|
5
|
+
module Extraction
|
6
|
+
module PdfExtractor
|
7
|
+
# An extracted sub-Pdf.
|
8
|
+
class ExtractedPdf
|
9
|
+
# Byte contents of the pdf
|
10
|
+
# @return [StreamIO]
|
11
|
+
attr_reader :pdf_bytes
|
12
|
+
|
13
|
+
# Name of the file.
|
14
|
+
# @return [String]
|
15
|
+
attr_reader :filename
|
16
|
+
|
17
|
+
# @param pdf_bytes [StreamIO]
|
18
|
+
# @param filename [String]
|
19
|
+
def initialize(pdf_bytes, filename)
|
20
|
+
@pdf_bytes = pdf_bytes
|
21
|
+
@filename = filename
|
22
|
+
end
|
23
|
+
|
24
|
+
# Retrieves the page count for a given pdf.
|
25
|
+
# @return [Integer]
|
26
|
+
def page_count
|
27
|
+
current_pdf = Mindee::PDF::PdfProcessor.open_pdf(pdf_bytes)
|
28
|
+
current_pdf.pages.size
|
29
|
+
rescue TypeError
|
30
|
+
raise 'Could not retrieve page count from Extracted PDF object.'
|
31
|
+
end
|
32
|
+
|
33
|
+
# Writes the contents of the current PDF object to a file.
|
34
|
+
# @param output_path [String] Path to write to.
|
35
|
+
def write_to_file(output_path)
|
36
|
+
raise 'Provided path is not a file' if File.directory?(destination)
|
37
|
+
raise 'Invalid save path provided' unless File.exist?(File.expand_path('..', output_path))
|
38
|
+
|
39
|
+
if File.extname(output_path).downcase == '.pdf'
|
40
|
+
base_path = File.expand_path('..', output_path)
|
41
|
+
output_path = File.expand_path("#{File.basename(output_path)}.pdf", base_path)
|
42
|
+
end
|
43
|
+
|
44
|
+
File.write(output_path, @pdf_bytes)
|
45
|
+
end
|
46
|
+
|
47
|
+
# Returns the current PDF object as a usable BytesInputSource.
|
48
|
+
# @return [Mindee::Input::Source::BytesInputSource]
|
49
|
+
def as_input_source
|
50
|
+
Mindee::Input::Source::BytesInputSource.new(@pdf_bytes.read, @filename)
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|