mindee 3.13.0 → 3.14.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/docs/bill_of_lading_v1.md +50 -1
- data/docs/energy_bill_fra_v1.md +61 -1
- data/docs/expense_receipts_v5.md +4 -4
- data/docs/financial_document_v1.md +14 -0
- data/docs/invoices_v4.md +16 -2
- data/docs/nutrition_facts_v1.md +80 -1
- data/docs/payslip_fra_v2.md +77 -1
- data/docs/us_mail_v2.md +1 -1
- data/examples/auto_invoice_splitter_extraction.rb +36 -31
- data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
- data/lib/mindee/extraction/common/extracted_image.rb +1 -1
- data/lib/mindee/extraction/common/image_extractor.rb +160 -157
- data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +22 -16
- data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +1 -1
- data/lib/mindee/parsing/standard/date_field.rb +4 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/version.rb +1 -1
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: '043953eb35ed9f251d12ca8b984edcf9acbeb491689574450dc3682b83e9db37'
|
4
|
+
data.tar.gz: 5916ad8aeada6713ee5a846d634693fd1ed5e585495238649f9a9c1d2e9fbd00
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7edd036d9666e1ffe0318b8ec68cdfe5b37b6bdbb90e36ae0c19efa5cc038f109af30d9e6df6838e4237db2f9495a6251f10164db7e0c7dff528720ef4477a2a
|
7
|
+
data.tar.gz: 7dfbb27c5175bda760ba816be62b1447acf4517d6741506cb458bbe4b2b4c77338eff1d799d19f4b0bdca663323523eb479bc468e80f9ed1943226694ef04143
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v3.14.0 - 2024-10-11
|
4
|
+
### Changes
|
5
|
+
* :sparkles: add support for Financial Document v1.10
|
6
|
+
* :sparkles: add support for Invoice v4.8
|
7
|
+
### Fixes
|
8
|
+
* :bug: fix multi-receipts extraction not working as intended
|
9
|
+
|
10
|
+
|
3
11
|
## v3.13.0 - 2024-09-18
|
4
12
|
### Changes
|
5
13
|
* :sparkles: add support for BillOfLadingV1
|
data/docs/bill_of_lading_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Bill of Lading API](https://platform.mindee.com/mindee/bill_of_lading).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,55 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 3b5250a1-b52c-4e0b-bc3e-2f0146b04e29
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/bill_of_lading v1.1
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Bill of Lading Number: XYZ123456
|
52
|
+
:Shipper:
|
53
|
+
:Address: 123 OCEAN DRIVE, SHANGHAI, CHINA
|
54
|
+
:Email:
|
55
|
+
:Name: GLOBAL FREIGHT SOLUTIONS INC.
|
56
|
+
:Phone: 86-21-12345678
|
57
|
+
:Consignee:
|
58
|
+
:Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
|
59
|
+
:Email:
|
60
|
+
:Name: PACIFIC TRADING CO.
|
61
|
+
:Phone: 65-65432100
|
62
|
+
:Notify Party:
|
63
|
+
:Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
|
64
|
+
:Email:
|
65
|
+
:Name: PACIFIC TRADING CO.
|
66
|
+
:Phone: 65-65432100
|
67
|
+
:Carrier:
|
68
|
+
:Name: GLOBAL SHIPPING CO.,LTD.
|
69
|
+
:Professional Number:
|
70
|
+
:SCAC:
|
71
|
+
:Items:
|
72
|
+
+--------------------------------------+--------------+-------------+------------------+----------+-------------+
|
73
|
+
| Description | Gross Weight | Measurement | Measurement Unit | Quantity | Weight Unit |
|
74
|
+
+======================================+==============+=============+==================+==========+=============+
|
75
|
+
| ELECTRONIC COMPONENTS\nP/N: 12345... | 500.00 | 1.50 | cbm | 1.00 | kgs |
|
76
|
+
+--------------------------------------+--------------+-------------+------------------+----------+-------------+
|
77
|
+
:Port of Loading: SHANGHAI, CHINA
|
78
|
+
:Port of Discharge: LOS ANGELES, USA
|
79
|
+
:Place of Delivery: LOS ANGELES, USA
|
80
|
+
:Date of issue: 2022-09-30
|
81
|
+
:Departure Date:
|
82
|
+
```
|
83
|
+
|
35
84
|
# Field Types
|
36
85
|
## Standard Fields
|
37
86
|
These fields are generic and used in several products.
|
data/docs/energy_bill_fra_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Energy Bill API](https://platform.mindee.com/mindee/energy_bill_fra).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,66 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 17f0ccef-e3fe-4a28-838d-d704489d6ce7
|
42
|
+
:Filename: default_sample.pdf
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/energy_bill_fra v1.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Invoice Number: 10123590373
|
52
|
+
:Contract ID: 1234567890
|
53
|
+
:Delivery Point: 98765432109876
|
54
|
+
:Invoice Date: 2021-01-29
|
55
|
+
:Due Date: 2021-02-15
|
56
|
+
:Total Before Taxes: 1241.03
|
57
|
+
:Total Taxes: 238.82
|
58
|
+
:Total Amount: 1479.85
|
59
|
+
:Energy Supplier:
|
60
|
+
:Address: TSA 12345, 12345 DEMOCITY CEDEX, 75001 PARIS
|
61
|
+
:Name: EDF
|
62
|
+
:Energy Consumer:
|
63
|
+
:Address: 12 AVENUE DES RÊVES, RDC A 123 COUR FAUSSE A, 75000 PARIS
|
64
|
+
:Name: John Doe
|
65
|
+
:Subscription:
|
66
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
67
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
68
|
+
+======================================+============+============+==========+===========+============+
|
69
|
+
| Abonnement électricité | 2021-02-28 | 2021-01-01 | 5.50 | 59.00 | 29.50 |
|
70
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
71
|
+
:Energy Usage:
|
72
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
73
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
74
|
+
+======================================+============+============+==========+===========+============+
|
75
|
+
| Consommation (HT) | 2021-01-27 | 2020-11-28 | 20.00 | 898.43 | 10.47 |
|
76
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
77
|
+
:Taxes and Contributions:
|
78
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
79
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
80
|
+
+======================================+============+============+==========+===========+============+
|
81
|
+
| Contribution au Service Public de... | 2021-01-27 | 2020-11-28 | 20.00 | 193.07 | 2.25 |
|
82
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
83
|
+
| Départementale sur la Conso Final... | 2020-12-31 | 2020-11-28 | 20.00 | 13.98 | 0.3315 |
|
84
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
85
|
+
| Communale sur la Conso Finale Ele... | 2021-01-27 | 2021-01-01 | 20.00 | 28.56 | 0.6545 |
|
86
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
87
|
+
| Contribution Tarifaire d'Achemine... | 2020-12-31 | 2020-11-28 | 20.00 | 27.96 | 0.663 |
|
88
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
89
|
+
:Meter Details:
|
90
|
+
:Meter Number: 620
|
91
|
+
:Meter Type: electricity
|
92
|
+
:Unit of Measure: kWh
|
93
|
+
```
|
94
|
+
|
35
95
|
# Field Types
|
36
96
|
## Standard Fields
|
37
97
|
These fields are generic and used in several products.
|
data/docs/expense_receipts_v5.md
CHANGED
@@ -50,7 +50,7 @@ Prediction
|
|
50
50
|
==========
|
51
51
|
:Expense Locale: en-GB; en; GB; GBP;
|
52
52
|
:Purchase Category: food
|
53
|
-
:Purchase Subcategory:
|
53
|
+
:Purchase Subcategory:
|
54
54
|
:Document Type: EXPENSE RECEIPT
|
55
55
|
:Purchase Date: 2016-02-26
|
56
56
|
:Purchase Time: 15:20
|
@@ -64,7 +64,7 @@ Prediction
|
|
64
64
|
+===============+========+==========+===============+
|
65
65
|
| 8.50 | VAT | 20.00 | 1.70 |
|
66
66
|
+---------------+--------+----------+---------------+
|
67
|
-
:Supplier Name:
|
67
|
+
:Supplier Name: Clachan
|
68
68
|
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
69
69
|
Type: VAT NUMBER, Value: 232153895
|
70
70
|
:Supplier Address: 34 Kingley Street W1B 50H
|
@@ -84,7 +84,7 @@ Page 0
|
|
84
84
|
------
|
85
85
|
:Expense Locale: en-GB; en; GB; GBP;
|
86
86
|
:Purchase Category: food
|
87
|
-
:Purchase Subcategory:
|
87
|
+
:Purchase Subcategory:
|
88
88
|
:Document Type: EXPENSE RECEIPT
|
89
89
|
:Purchase Date: 2016-02-26
|
90
90
|
:Purchase Time: 15:20
|
@@ -98,7 +98,7 @@ Page 0
|
|
98
98
|
+===============+========+==========+===============+
|
99
99
|
| 8.50 | VAT | 20.00 | 1.70 |
|
100
100
|
+---------------+--------+----------+---------------+
|
101
|
-
:Supplier Name:
|
101
|
+
:Supplier Name: Clachan
|
102
102
|
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
103
103
|
Type: VAT NUMBER, Value: 232153895
|
104
104
|
:Supplier Address: 34 Kingley Street W1B 50H
|
@@ -370,6 +370,20 @@ end
|
|
370
370
|
puts result.document.inference.prediction.locale.value
|
371
371
|
```
|
372
372
|
|
373
|
+
## Payment Date
|
374
|
+
**payment_date** ([DateField](#date-field)): The date on which the payment is due / fullfilled.
|
375
|
+
|
376
|
+
```rb
|
377
|
+
puts result.document.inference.prediction.payment_date.value
|
378
|
+
```
|
379
|
+
|
380
|
+
## Purchase Order Number
|
381
|
+
**po_number** ([StringField](#string-field)): The purchase order number.
|
382
|
+
|
383
|
+
```rb
|
384
|
+
puts result.document.inference.prediction.po_number.value
|
385
|
+
```
|
386
|
+
|
373
387
|
## Receipt Number
|
374
388
|
**receipt_number** ([StringField](#string-field)): The receipt number or identifier only if document is a receipt.
|
375
389
|
|
data/docs/invoices_v4.md
CHANGED
@@ -93,7 +93,7 @@ Prediction
|
|
93
93
|
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
|
94
94
|
:Supplier Phone Number: 4165551212
|
95
95
|
:Supplier Website:
|
96
|
-
:Supplier Email:
|
96
|
+
:Supplier Email: ldoi@example.com
|
97
97
|
:Customer Name: JIRO DOI
|
98
98
|
:Customer Company Registrations:
|
99
99
|
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
|
@@ -137,7 +137,7 @@ Page 0
|
|
137
137
|
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
|
138
138
|
:Supplier Phone Number: 4165551212
|
139
139
|
:Supplier Website:
|
140
|
-
:Supplier Email:
|
140
|
+
:Supplier Email: ldoi@example.com
|
141
141
|
:Customer Name: JIRO DOI
|
142
142
|
:Customer Company Registrations:
|
143
143
|
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
|
@@ -332,6 +332,20 @@ end
|
|
332
332
|
puts result.document.inference.prediction.locale.value
|
333
333
|
```
|
334
334
|
|
335
|
+
## Payment Date
|
336
|
+
**payment_date** ([DateField](#date-field)): The date on which the payment is due/ was full-filled.
|
337
|
+
|
338
|
+
```rb
|
339
|
+
puts result.document.inference.prediction.payment_date.value
|
340
|
+
```
|
341
|
+
|
342
|
+
## Purchase Order Number
|
343
|
+
**po_number** ([StringField](#string-field)): The purchase order number.
|
344
|
+
|
345
|
+
```rb
|
346
|
+
puts result.document.inference.prediction.po_number.value
|
347
|
+
```
|
348
|
+
|
335
349
|
## Reference Numbers
|
336
350
|
**reference_numbers** (Array<[StringField](#string-field)>): List of Reference numbers, including PO number.
|
337
351
|
|
data/docs/nutrition_facts_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Nutrition Facts Label API](https://platform.mindee.com/mindee/nutrition_facts).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,85 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 38a12fe0-5d69-4ca4-9b30-12f1b659311c
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/nutrition_facts v1.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Serving per Box: 2.00
|
52
|
+
:Serving Size:
|
53
|
+
:Amount: 228.00
|
54
|
+
:Unit: g
|
55
|
+
:Calories:
|
56
|
+
:Daily Value:
|
57
|
+
:Per 100g:
|
58
|
+
:Per Serving: 250.00
|
59
|
+
:Total Fat:
|
60
|
+
:Daily Value:
|
61
|
+
:Per 100g:
|
62
|
+
:Per Serving: 12.00
|
63
|
+
:Saturated Fat:
|
64
|
+
:Daily Value: 15.00
|
65
|
+
:Per 100g:
|
66
|
+
:Per Serving: 3.00
|
67
|
+
:Trans Fat:
|
68
|
+
:Daily Value:
|
69
|
+
:Per 100g:
|
70
|
+
:Per Serving: 3.00
|
71
|
+
:Cholesterol:
|
72
|
+
:Daily Value: 10.00
|
73
|
+
:Per 100g:
|
74
|
+
:Per Serving: 30.00
|
75
|
+
:Total Carbohydrate:
|
76
|
+
:Daily Value: 10.00
|
77
|
+
:Per 100g:
|
78
|
+
:Per Serving: 31.00
|
79
|
+
:Dietary Fiber:
|
80
|
+
:Daily Value: 0.00
|
81
|
+
:Per 100g:
|
82
|
+
:Per Serving: 0.00
|
83
|
+
:Total Sugars:
|
84
|
+
:Daily Value:
|
85
|
+
:Per 100g:
|
86
|
+
:Per Serving: 5.00
|
87
|
+
:Added Sugars:
|
88
|
+
:Daily Value:
|
89
|
+
:Per 100g:
|
90
|
+
:Per Serving:
|
91
|
+
:Protein:
|
92
|
+
:Daily Value:
|
93
|
+
:Per 100g:
|
94
|
+
:Per Serving: 5.00
|
95
|
+
:sodium:
|
96
|
+
:Daily Value: 20.00
|
97
|
+
:Per 100g:
|
98
|
+
:Per Serving: 470.00
|
99
|
+
:Unit: mg
|
100
|
+
:nutrients:
|
101
|
+
+-------------+----------------------+----------+-------------+------+
|
102
|
+
| Daily Value | Name | Per 100g | Per Serving | Unit |
|
103
|
+
+=============+======================+==========+=============+======+
|
104
|
+
| 12.00 | Vitamin A | | 4.00 | mcg |
|
105
|
+
+-------------+----------------------+----------+-------------+------+
|
106
|
+
| 12.00 | Vitamin C | | 2.00 | mg |
|
107
|
+
+-------------+----------------------+----------+-------------+------+
|
108
|
+
| 12.00 | Calcium | | 45.60 | mg |
|
109
|
+
+-------------+----------------------+----------+-------------+------+
|
110
|
+
| 12.00 | Iron | | 0.90 | mg |
|
111
|
+
+-------------+----------------------+----------+-------------+------+
|
112
|
+
```
|
113
|
+
|
35
114
|
# Field Types
|
36
115
|
## Standard Fields
|
37
116
|
These fields are generic and used in several products.
|
data/docs/payslip_fra_v2.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Payslip API](https://platform.mindee.com/mindee/payslip_fra).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,82 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 972edba5-25aa-49d0-8431-e2557ddd788e
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/payslip_fra v2.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Employee:
|
52
|
+
:Address: 52 RUE DES FLEURS 33500 LIBOURNE FRANCE
|
53
|
+
:Date of Birth:
|
54
|
+
:First Name: Jean Luc
|
55
|
+
:Last Name: Picard
|
56
|
+
:Phone Number:
|
57
|
+
:Registration Number:
|
58
|
+
:Social Security Number: 123456789012345
|
59
|
+
:Employer:
|
60
|
+
:Address: 1 RUE DU TONNOT 25210 DOUBS
|
61
|
+
:Company ID: 12345678901234
|
62
|
+
:Company Site:
|
63
|
+
:NAF Code: 1234A
|
64
|
+
:Name: DEMO COMPANY
|
65
|
+
:Phone Number:
|
66
|
+
:URSSAF Number:
|
67
|
+
:Bank Account Details:
|
68
|
+
:Bank Name:
|
69
|
+
:IBAN:
|
70
|
+
:SWIFT:
|
71
|
+
:Employment:
|
72
|
+
:Category: Cadre
|
73
|
+
:Coefficient: 600.00
|
74
|
+
:Collective Agreement: Construction -- Promotion
|
75
|
+
:Job Title: Directeur Régional du Développement
|
76
|
+
:Position Level:
|
77
|
+
:Start Date: 2022-05-01
|
78
|
+
:Salary Details:
|
79
|
+
+--------------+-----------+--------------------------------------+-----------+
|
80
|
+
| Amount | Base | Description | Rate |
|
81
|
+
+==============+===========+======================================+===========+
|
82
|
+
| 6666.67 | | Salaire de base | |
|
83
|
+
+--------------+-----------+--------------------------------------+-----------+
|
84
|
+
| 9.30 | | Part patronale Mutuelle NR | |
|
85
|
+
+--------------+-----------+--------------------------------------+-----------+
|
86
|
+
| 508.30 | | Avantages en nature voiture | |
|
87
|
+
+--------------+-----------+--------------------------------------+-----------+
|
88
|
+
:Pay Detail:
|
89
|
+
:Gross Salary: 7184.27
|
90
|
+
:Gross Salary YTD: 18074.81
|
91
|
+
:Income Tax Rate: 17.60
|
92
|
+
:Income Tax Withheld: 1030.99
|
93
|
+
:Net Paid: 3868.32
|
94
|
+
:Net Paid Before Tax: 4899.31
|
95
|
+
:Net Taxable: 5857.90
|
96
|
+
:Net Taxable YTD: 14752.73
|
97
|
+
:Total Cost Employer: 10486.94
|
98
|
+
:Total Taxes and Deductions: 1650.36
|
99
|
+
:PTO:
|
100
|
+
:Accrued This Period: 6.17
|
101
|
+
:Balance End of Period: 6.17
|
102
|
+
:Used This Period:
|
103
|
+
:Pay Period:
|
104
|
+
:End Date: 2023-03-31
|
105
|
+
:Month: 03
|
106
|
+
:Payment Date: 2023-03-29
|
107
|
+
:Start Date: 2023-03-01
|
108
|
+
:Year: 2023
|
109
|
+
```
|
110
|
+
|
35
111
|
# Field Types
|
36
112
|
## Standard Fields
|
37
113
|
These fields are generic and used in several products.
|
data/docs/us_mail_v2.md
CHANGED
@@ -38,7 +38,7 @@ puts result.document
|
|
38
38
|
:Sender Name: zed
|
39
39
|
:Sender Address:
|
40
40
|
:City: Dallas
|
41
|
-
:Complete Address: 54321 Elm Street, Dallas, Texas
|
41
|
+
:Complete Address: 54321 Elm Street, Dallas, Texas 54321
|
42
42
|
:Postal Code: 54321
|
43
43
|
:State: TX
|
44
44
|
:Street: 54321 Elm Street
|
@@ -2,42 +2,47 @@
|
|
2
2
|
|
3
3
|
require 'mindee'
|
4
4
|
|
5
|
-
|
6
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
-
|
8
|
-
|
9
|
-
input_source
|
10
|
-
|
11
|
-
if input_source.pdf?
|
12
|
-
pdf_extractor = Mindee::Extraction::PdfExtractor.new(input_source)
|
13
|
-
if pdf_extractor.page_count > 1
|
14
|
-
invoice_splitter_response = mindee_client.enqueue_and_parse(
|
15
|
-
input_source,
|
16
|
-
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1
|
17
|
-
)
|
18
|
-
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
|
19
|
-
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
|
20
|
-
extracted_pdfs.each do |extracted_pdf|
|
21
|
-
# Optional: Save the files locally
|
22
|
-
# extracted_pdf.write_to_file("output/path")
|
23
|
-
|
24
|
-
invoice_result = mindee_client.parse(
|
25
|
-
InvoiceV4,
|
26
|
-
extracted_pdf.as_source
|
27
|
-
)
|
28
|
-
puts invoice_result
|
29
|
-
end
|
5
|
+
def invoice_splitter_auto_extraction(file_path)
|
6
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
+
input_source = mindee_client.source_from_path(file_path)
|
8
|
+
|
9
|
+
if input_source.pdf? && input_source.count_pdf_pages > 1
|
10
|
+
parse_multi_page(mindee_client, input_source)
|
30
11
|
else
|
31
|
-
|
32
|
-
input_source,
|
33
|
-
Mindee::Product::Invoice::InvoiceV4
|
34
|
-
)
|
35
|
-
puts invoice_result.document
|
12
|
+
parse_single_page(mindee_client, input_source)
|
36
13
|
end
|
37
|
-
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_single_page(mindee_client, input_source)
|
38
17
|
invoice_result = mindee_client.parse(
|
39
18
|
input_source,
|
40
19
|
Mindee::Product::Invoice::InvoiceV4
|
41
20
|
)
|
42
21
|
puts invoice_result.document
|
43
22
|
end
|
23
|
+
|
24
|
+
def parse_multi_page(mindee_client, input_source)
|
25
|
+
pdf_extractor = Mindee::Extraction::PdfExtractor::PdfExtractor.new(input_source)
|
26
|
+
invoice_splitter_response = mindee_client.enqueue_and_parse(
|
27
|
+
input_source,
|
28
|
+
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1,
|
29
|
+
close_file: false
|
30
|
+
)
|
31
|
+
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
|
32
|
+
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
|
33
|
+
|
34
|
+
extracted_pdfs.each do |extracted_pdf|
|
35
|
+
# Optional: Save the files locally
|
36
|
+
# extracted_pdf.write_to_file("output/path")
|
37
|
+
|
38
|
+
invoice_result = mindee_client.parse(
|
39
|
+
extracted_pdf.as_input_source,
|
40
|
+
Mindee::Product::Invoice::InvoiceV4,
|
41
|
+
close_file: false
|
42
|
+
)
|
43
|
+
puts invoice_result.document
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
my_file_path = '/path/to/the/file.ext'
|
48
|
+
invoice_splitter_auto_extraction(my_file_path)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mindee'
|
4
|
+
require 'mindee/extraction'
|
5
|
+
|
6
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
+
def multi_receipts_detection(file_path, mindee_client)
|
8
|
+
input_source = mindee_client.source_from_path(file_path)
|
9
|
+
|
10
|
+
result_split = mindee_client.parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::MultiReceiptsDetector::MultiReceiptsDetectorV1,
|
13
|
+
close_file: false
|
14
|
+
)
|
15
|
+
|
16
|
+
images = Mindee::Extraction::MultiReceiptsExtractor.extract_receipts(input_source, result_split.document.inference)
|
17
|
+
images.each do |sub_image|
|
18
|
+
# Optional: Save the files locally
|
19
|
+
# sub_image.write_to_file("/path/to/my/extracted/file/folder")
|
20
|
+
|
21
|
+
result_receipt = mindee_client.parse(
|
22
|
+
sub_image.as_source,
|
23
|
+
Mindee::Product::Receipt::ReceiptV5,
|
24
|
+
close_file: false
|
25
|
+
)
|
26
|
+
puts result_receipt.document
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
my_file_path = '/path/to/the/file.ext'
|
31
|
+
multi_receipts_detection(my_file_path, mindee_client)
|
@@ -9,180 +9,183 @@ require_relative 'extracted_image'
|
|
9
9
|
|
10
10
|
module Mindee
|
11
11
|
# Image Extraction Module.
|
12
|
-
module
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
12
|
+
module Extraction
|
13
|
+
# Image Extraction wrapper class.
|
14
|
+
class ImageExtractor
|
15
|
+
def self.attach_image_as_new_file(input_buffer)
|
16
|
+
# Attaches an image as a new page in a PdfDocument object.
|
17
|
+
#
|
18
|
+
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
19
|
+
# @return [Origami::PDF] A PdfDocument handle.
|
20
|
+
|
21
|
+
magick_image = MiniMagick::Image.read(input_buffer)
|
22
|
+
# NOTE: some jpeg images get rendered as three different versions of themselves per output if the format isn't
|
23
|
+
# converted.
|
24
|
+
magick_image.format('jpg')
|
25
|
+
original_density = magick_image.resolution
|
26
|
+
scale_factor = original_density[0].to_f / 4.166666 # No clue why bit the resolution needs to be reduced for
|
27
|
+
# the pdf otherwise the resulting image shrinks.
|
28
|
+
magick_image.format('pdf', 0, { density: scale_factor.to_s })
|
29
|
+
Origami::PDF.read(StringIO.new(magick_image.to_blob))
|
30
|
+
end
|
29
31
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
32
|
+
# Extracts multiple images from a given local input source.
|
33
|
+
#
|
34
|
+
# @param [Mindee::Input::Source::LocalInputSource] input_source
|
35
|
+
# @param [Integer] page_id ID of the Page to extract from.
|
36
|
+
# @param [Array<Array<Mindee::Geometry::Point>>, Array<Mindee::Geometry::Quadrangle>] polygons List of coordinates
|
37
|
+
# to extract.
|
38
|
+
# @return [Array<Mindee::Extraction::ExtractedImage>] Extracted Images.
|
39
|
+
def self.extract_multiple_images_from_source(input_source, page_id, polygons)
|
40
|
+
new_stream = load_doc(input_source, page_id)
|
41
|
+
new_stream.seek(0)
|
42
|
+
|
43
|
+
extract_images_from_polygons(input_source, new_stream, page_id, polygons)
|
44
|
+
end
|
43
45
|
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
46
|
+
# Retrieves a PDF document's page.
|
47
|
+
#
|
48
|
+
# @param [Origami::PDF] pdf_doc Origami PDF handle.
|
49
|
+
# @param [Integer] page_id Page ID.
|
50
|
+
def self.get_page(pdf_doc, page_id)
|
51
|
+
stream = StringIO.new
|
52
|
+
pdf_doc.save(stream)
|
51
53
|
|
52
|
-
|
53
|
-
|
54
|
-
|
54
|
+
options = {
|
55
|
+
page_indexes: [page_id - 1],
|
56
|
+
}
|
55
57
|
|
56
|
-
|
57
|
-
|
58
|
+
Mindee::PDF::PdfProcessor.parse(stream, options)
|
59
|
+
end
|
58
60
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
61
|
+
# Extracts images from their positions on a file (as polygons).
|
62
|
+
#
|
63
|
+
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
64
|
+
# @param [StringIO] pdf_stream Buffer of the PDF.
|
65
|
+
# @param [Integer] page_id Page ID.
|
66
|
+
# @param [Array<Mindee::Geometry::Point, Mindee::Geometry::Polygon, Mindee::Geometry::Quadrangle>] polygons
|
67
|
+
# @return [Array<Mindee::Extraction::ExtractedImage>] Extracted Images.
|
68
|
+
def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
|
69
|
+
extracted_elements = []
|
70
|
+
|
71
|
+
polygons.each_with_index do |polygon, element_id|
|
72
|
+
polygon = normalize_polygon(polygon)
|
73
|
+
page_content = read_page_content(pdf_stream)
|
74
|
+
|
75
|
+
min_max_x = Geometry.get_min_max_x([
|
76
|
+
polygon.top_left,
|
77
|
+
polygon.bottom_right,
|
78
|
+
polygon.top_right,
|
79
|
+
polygon.bottom_left,
|
80
|
+
])
|
81
|
+
min_max_y = Geometry.get_min_max_y([
|
82
|
+
polygon.top_left,
|
83
|
+
polygon.bottom_right,
|
84
|
+
polygon.top_right,
|
85
|
+
polygon.bottom_left,
|
86
|
+
])
|
87
|
+
file_extension = determine_file_extension(input_source)
|
88
|
+
cropped_image = crop_image(page_content, min_max_x, min_max_y)
|
89
|
+
if file_extension == 'pdf'
|
90
|
+
cropped_image.format('jpg')
|
91
|
+
else
|
92
|
+
cropped_image.format(file_extension)
|
93
|
+
end
|
94
|
+
|
95
|
+
buffer = StringIO.new
|
96
|
+
write_image_to_buffer(cropped_image, buffer)
|
97
|
+
file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
|
98
|
+
|
99
|
+
extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
|
91
100
|
end
|
92
101
|
|
93
|
-
|
94
|
-
write_image_to_buffer(cropped_image, buffer)
|
95
|
-
file_name = "#{input_source.filename}_page#{page_id}-#{element_id}.#{file_extension}"
|
96
|
-
|
97
|
-
extracted_elements << create_extracted_image(buffer, file_name, page_id, element_id)
|
102
|
+
extracted_elements
|
98
103
|
end
|
99
104
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
else
|
110
|
-
polygon
|
105
|
+
# Retrieves the bounding box of a polygon.
|
106
|
+
#
|
107
|
+
# @param [Array<Point>, Mindee::Geometry::Polygon] polygon
|
108
|
+
def self.normalize_polygon(polygon)
|
109
|
+
if polygon.is_a?(Mindee::Geometry::Polygon)
|
110
|
+
Mindee::Geometry.get_bounding_box(polygon)
|
111
|
+
else
|
112
|
+
polygon
|
113
|
+
end
|
111
114
|
end
|
112
|
-
end
|
113
115
|
|
114
|
-
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
|
116
|
+
# Loads a buffer into a MiniMagick Image.
|
117
|
+
#
|
118
|
+
# @param [StringIO] pdf_stream Buffer containg the PDF
|
119
|
+
# @return [MiniMagick::Image] a valid MiniMagick image handle.
|
120
|
+
def self.read_page_content(pdf_stream)
|
121
|
+
pdf_stream.rewind
|
122
|
+
MiniMagick::Image.read(pdf_stream)
|
123
|
+
end
|
122
124
|
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
|
136
|
-
|
137
|
-
|
138
|
-
|
125
|
+
# Crops a MiniMagick Image from a the given bounding box.
|
126
|
+
#
|
127
|
+
# @param [MiniMagick::Image] image Input Image.
|
128
|
+
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
129
|
+
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
130
|
+
def self.crop_image(image, min_max_x, min_max_y)
|
131
|
+
width = image[:width].to_i
|
132
|
+
height = image[:height].to_i
|
133
|
+
|
134
|
+
image.format('jpg')
|
135
|
+
new_width = (min_max_x.max - min_max_x.min) * width
|
136
|
+
new_height = (min_max_y.max - min_max_y.min) * height
|
137
|
+
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
138
|
+
|
139
|
+
image
|
140
|
+
end
|
139
141
|
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
142
|
+
# Writes a MiniMagick::Image to a buffer.
|
143
|
+
#
|
144
|
+
# @param [MiniMagick::Image] image a valid MiniMagick image.
|
145
|
+
# @param [StringIO] buffer
|
146
|
+
def self.write_image_to_buffer(image, buffer)
|
147
|
+
image.write(buffer)
|
148
|
+
end
|
147
149
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
150
|
+
# Retrieves the file extension from the main file to apply it to the extracted images. Note: coerces pdf as jpg.
|
151
|
+
#
|
152
|
+
# @param [Mindee::Input::Source::LocalInputSource] input_source Local input source.
|
153
|
+
# @return [String] A valid file extension.
|
154
|
+
def self.determine_file_extension(input_source)
|
155
|
+
if input_source.pdf? || input_source.filename.downcase.end_with?('pdf')
|
156
|
+
'jpg'
|
157
|
+
else
|
158
|
+
File.extname(input_source.filename).strip.downcase[1..]
|
159
|
+
end
|
157
160
|
end
|
158
|
-
end
|
159
161
|
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
162
|
+
# Generates an ExtractedImage.
|
163
|
+
#
|
164
|
+
# @param [StringIO] buffer Buffer containing the image.
|
165
|
+
# @param [String] file_name Name for the file.
|
166
|
+
# @param [Object] page_id ID of the page the file was generated from.
|
167
|
+
# @param [Object] element_id ID of the element of a given page.
|
168
|
+
def self.create_extracted_image(buffer, file_name, page_id, element_id)
|
169
|
+
buffer.rewind
|
170
|
+
ExtractedImage.new(
|
171
|
+
Mindee::Input::Source::BytesInputSource.new(buffer.read, file_name),
|
172
|
+
page_id,
|
173
|
+
element_id
|
174
|
+
)
|
175
|
+
end
|
174
176
|
|
175
|
-
|
176
|
-
|
177
|
-
|
178
|
-
|
179
|
-
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
177
|
+
# Loads a single_page from an image file or a pdf document.
|
178
|
+
#
|
179
|
+
# @param input_file [LocalInputSource] Local input.
|
180
|
+
# @param [Integer] page_id Page ID.
|
181
|
+
# @return [MiniMagick::Image] A valid PdfDocument handle.
|
182
|
+
def self.load_doc(input_file, page_id)
|
183
|
+
input_file.io_stream.rewind
|
184
|
+
if input_file.pdf?
|
185
|
+
get_page(Origami::PDF.read(input_file.io_stream), page_id)
|
186
|
+
else
|
187
|
+
input_file.io_stream
|
188
|
+
end
|
186
189
|
end
|
187
190
|
end
|
188
191
|
end
|
@@ -1,26 +1,32 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
+
require_relative '../common/image_extractor'
|
4
|
+
|
3
5
|
module Mindee
|
4
6
|
# Image Extraction Module.
|
5
|
-
module
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
7
|
+
module Extraction
|
8
|
+
# Multi-receipts extraction class wrapper.
|
9
|
+
class MultiReceiptsExtractor
|
10
|
+
def self.extract_receipts(input_source, inference)
|
11
|
+
# Extracts individual receipts from multi-receipts documents.
|
12
|
+
#
|
13
|
+
# @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
|
14
|
+
# @param inference [Inference] Results of the inference.
|
15
|
+
# @return [Array<ExtractedImage>] Individual extracted receipts as an array of ExtractedMultiReceiptsImage.
|
12
16
|
|
13
|
-
|
14
|
-
|
17
|
+
images = []
|
18
|
+
raise 'No possible receipts candidates found for MultiReceipts extraction.' unless inference.prediction.receipts
|
15
19
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
(0...input_source.count_pdf_pages).each do |page_id|
|
21
|
+
receipt_positions = inference.pages[page_id].prediction.receipts.map(&:bounding_box)
|
22
|
+
images.concat(
|
23
|
+
Mindee::Extraction::ImageExtractor.extract_multiple_images_from_source(input_source, page_id + 1,
|
24
|
+
receipt_positions)
|
25
|
+
)
|
26
|
+
end
|
22
27
|
|
23
|
-
|
28
|
+
images
|
29
|
+
end
|
24
30
|
end
|
25
31
|
end
|
26
32
|
end
|
@@ -13,7 +13,7 @@ module Mindee
|
|
13
13
|
if local_input.pdf?
|
14
14
|
@source_pdf = local_input.io_stream
|
15
15
|
else
|
16
|
-
pdf_image =
|
16
|
+
pdf_image = Extraction::ImageExtractor.attach_image_as_new_file(local_input.io_stream)
|
17
17
|
io_buffer = StringIO.new
|
18
18
|
pdf_image.save(io_buffer)
|
19
19
|
|
@@ -18,11 +18,15 @@ module Mindee
|
|
18
18
|
# The textual representation of the date as found on the document.
|
19
19
|
# @return [String, nil]
|
20
20
|
attr_reader :raw
|
21
|
+
# Whether the field was computed or retrieved directly from the document.
|
22
|
+
# @return [Boolean, nil]
|
23
|
+
attr_reader :is_computed
|
21
24
|
|
22
25
|
# @param prediction [Hash]
|
23
26
|
# @param page_id [Integer, nil]
|
24
27
|
def initialize(prediction, page_id)
|
25
28
|
super
|
29
|
+
@is_computed = prediction['is_computed']
|
26
30
|
return unless @value
|
27
31
|
|
28
32
|
@date_object = Date.parse(@value)
|
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_line_item'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module FinancialDocument
|
9
|
-
# Financial Document API version 1.
|
9
|
+
# Financial Document API version 1.10 document data.
|
10
10
|
class FinancialDocumentV1Document < Mindee::Parsing::Common::Prediction
|
11
11
|
include Mindee::Parsing::Standard
|
12
12
|
# The customer's address used for billing.
|
@@ -48,6 +48,12 @@ module Mindee
|
|
48
48
|
# The locale detected on the document.
|
49
49
|
# @return [Mindee::Parsing::Standard::LocaleField]
|
50
50
|
attr_reader :locale
|
51
|
+
# The date on which the payment is due / fullfilled.
|
52
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
53
|
+
attr_reader :payment_date
|
54
|
+
# The purchase order number.
|
55
|
+
# @return [Mindee::Parsing::Standard::StringField]
|
56
|
+
attr_reader :po_number
|
51
57
|
# The receipt number or identifier only if document is a receipt.
|
52
58
|
# @return [Mindee::Parsing::Standard::StringField]
|
53
59
|
attr_reader :receipt_number
|
@@ -123,6 +129,8 @@ module Mindee
|
|
123
129
|
@line_items.push(FinancialDocumentV1LineItem.new(item, page_id))
|
124
130
|
end
|
125
131
|
@locale = LocaleField.new(prediction['locale'], page_id)
|
132
|
+
@payment_date = DateField.new(prediction['payment_date'], page_id)
|
133
|
+
@po_number = StringField.new(prediction['po_number'], page_id)
|
126
134
|
@receipt_number = StringField.new(prediction['receipt_number'], page_id)
|
127
135
|
@reference_numbers = []
|
128
136
|
prediction['reference_numbers'].each do |item|
|
@@ -161,11 +169,13 @@ module Mindee
|
|
161
169
|
out_str = String.new
|
162
170
|
out_str << "\n:Locale: #{@locale}".rstrip
|
163
171
|
out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
|
172
|
+
out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
|
164
173
|
out_str << "\n:Receipt Number: #{@receipt_number}".rstrip
|
165
174
|
out_str << "\n:Document Number: #{@document_number}".rstrip
|
166
175
|
out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
|
167
176
|
out_str << "\n:Purchase Date: #{@date}".rstrip
|
168
177
|
out_str << "\n:Due Date: #{@due_date}".rstrip
|
178
|
+
out_str << "\n:Payment Date: #{@payment_date}".rstrip
|
169
179
|
out_str << "\n:Total Net: #{@total_net}".rstrip
|
170
180
|
out_str << "\n:Total Amount: #{@total_amount}".rstrip
|
171
181
|
out_str << "\n:Taxes:#{@taxes}".rstrip
|
@@ -6,7 +6,7 @@ require_relative 'financial_document_v1_document'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module FinancialDocument
|
9
|
-
# Financial Document API version 1.
|
9
|
+
# Financial Document API version 1.10 page data.
|
10
10
|
class FinancialDocumentV1Page < Mindee::Parsing::Common::Page
|
11
11
|
# @param prediction [Hash]
|
12
12
|
def initialize(prediction)
|
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_line_item'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module Invoice
|
9
|
-
# Invoice API version 4.
|
9
|
+
# Invoice API version 4.8 document data.
|
10
10
|
class InvoiceV4Document < Mindee::Parsing::Common::Prediction
|
11
11
|
include Mindee::Parsing::Standard
|
12
12
|
# The customer's address used for billing.
|
@@ -42,6 +42,12 @@ module Mindee
|
|
42
42
|
# The locale detected on the document.
|
43
43
|
# @return [Mindee::Parsing::Standard::LocaleField]
|
44
44
|
attr_reader :locale
|
45
|
+
# The date on which the payment is due/ was full-filled.
|
46
|
+
# @return [Mindee::Parsing::Standard::DateField]
|
47
|
+
attr_reader :payment_date
|
48
|
+
# The purchase order number.
|
49
|
+
# @return [Mindee::Parsing::Standard::StringField]
|
50
|
+
attr_reader :po_number
|
45
51
|
# List of Reference numbers, including PO number.
|
46
52
|
# @return [Array<Mindee::Parsing::Standard::StringField>]
|
47
53
|
attr_reader :reference_numbers
|
@@ -103,6 +109,8 @@ module Mindee
|
|
103
109
|
@line_items.push(InvoiceV4LineItem.new(item, page_id))
|
104
110
|
end
|
105
111
|
@locale = LocaleField.new(prediction['locale'], page_id)
|
112
|
+
@payment_date = DateField.new(prediction['payment_date'], page_id)
|
113
|
+
@po_number = StringField.new(prediction['po_number'], page_id)
|
106
114
|
@reference_numbers = []
|
107
115
|
prediction['reference_numbers'].each do |item|
|
108
116
|
@reference_numbers.push(StringField.new(item, page_id))
|
@@ -137,9 +145,11 @@ module Mindee
|
|
137
145
|
out_str = String.new
|
138
146
|
out_str << "\n:Locale: #{@locale}".rstrip
|
139
147
|
out_str << "\n:Invoice Number: #{@invoice_number}".rstrip
|
148
|
+
out_str << "\n:Purchase Order Number: #{@po_number}".rstrip
|
140
149
|
out_str << "\n:Reference Numbers: #{reference_numbers}".rstrip
|
141
150
|
out_str << "\n:Purchase Date: #{@date}".rstrip
|
142
151
|
out_str << "\n:Due Date: #{@due_date}".rstrip
|
152
|
+
out_str << "\n:Payment Date: #{@payment_date}".rstrip
|
143
153
|
out_str << "\n:Total Net: #{@total_net}".rstrip
|
144
154
|
out_str << "\n:Total Amount: #{@total_amount}".rstrip
|
145
155
|
out_str << "\n:Total Tax: #{@total_tax}".rstrip
|
@@ -6,7 +6,7 @@ require_relative 'invoice_v4_document'
|
|
6
6
|
module Mindee
|
7
7
|
module Product
|
8
8
|
module Invoice
|
9
|
-
# Invoice API version 4.
|
9
|
+
# Invoice API version 4.8 page data.
|
10
10
|
class InvoiceV4Page < Mindee::Parsing::Common::Page
|
11
11
|
# @param prediction [Hash]
|
12
12
|
def initialize(prediction)
|
data/lib/mindee/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.14.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-10-11 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -198,6 +198,7 @@ files:
|
|
198
198
|
- docs/us_mail_v2.md
|
199
199
|
- docs/us_w9_v1.md
|
200
200
|
- examples/auto_invoice_splitter_extraction.rb
|
201
|
+
- examples/auto_multi_receipts_detector_extraction.rb
|
201
202
|
- lib/mindee.rb
|
202
203
|
- lib/mindee/client.rb
|
203
204
|
- lib/mindee/extraction.rb
|