mindee 3.13.0 → 3.15.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +18 -0
- data/docs/bill_of_lading_v1.md +50 -1
- data/docs/energy_bill_fra_v1.md +61 -1
- data/docs/expense_receipts_v5.md +4 -4
- data/docs/financial_document_v1.md +14 -0
- data/docs/invoices_v4.md +16 -2
- data/docs/nutrition_facts_v1.md +80 -1
- data/docs/payslip_fra_v2.md +77 -1
- data/docs/us_mail_v2.md +1 -1
- data/examples/auto_invoice_splitter_extraction.rb +36 -31
- data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
- data/lib/mindee/client.rb +1 -0
- data/lib/mindee/extraction/common/extracted_image.rb +1 -2
- data/lib/mindee/extraction/common/image_extractor.rb +147 -159
- data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +22 -16
- data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +3 -1
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +1 -0
- data/lib/mindee/geometry/point.rb +2 -1
- data/lib/mindee/image/image_compressor.rb +29 -0
- data/lib/mindee/image/image_utils.rb +104 -0
- data/lib/mindee/image.rb +4 -0
- data/lib/mindee/input/sources.rb +36 -0
- data/lib/mindee/parsing/standard/date_field.rb +4 -0
- data/lib/mindee/parsing/standard/position_field.rb +3 -0
- data/lib/mindee/pdf/pdf_compressor.rb +117 -0
- data/lib/mindee/pdf/{pdf_processing.rb → pdf_processor.rb} +17 -0
- data/lib/mindee/pdf/pdf_tools.rb +100 -0
- data/lib/mindee/pdf.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +10 -0
- data/mindee.gemspec +2 -1
- metadata +32 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9d356c6733d8a7d00973b219dbae06199040ca8d4bece4eb3906c8ec873aebf0
|
4
|
+
data.tar.gz: ab240a95c8538891aa4a3ef48285903daa06cebaf13f9578eff1a9675258d3bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ec71145b9604ba30c77a842a33c89c1ad4ab4c70301c3eed2292bc95803dd112ee99c964289ca88c16ddffcb6a37f63130b30a23326b4359929791d0dcef4214
|
7
|
+
data.tar.gz: 0c20c191f6abe4166075a1745860ba500a488294bcb59e2a28e0b61a3bcee07a25be2adfef113d045727eeaf10f935278271a7d259f78d63e580bf8eda3833f3
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,23 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v3.15.0 - 2024-10-29
|
4
|
+
### Changes
|
5
|
+
* :sparkles: add support for image compression
|
6
|
+
* :sparkles: add support for PDF compression
|
7
|
+
### Fixes
|
8
|
+
* :recycle: refactor pdf & image namespaces
|
9
|
+
* :memo: fix rubocop directives unexpectedly appearing in Yard documentation
|
10
|
+
* :arrow_up: bump version for mini_magick
|
11
|
+
|
12
|
+
|
13
|
+
## v3.14.0 - 2024-10-11
|
14
|
+
### Changes
|
15
|
+
* :sparkles: add support for Financial Document v1.10
|
16
|
+
* :sparkles: add support for Invoice v4.8
|
17
|
+
### Fixes
|
18
|
+
* :bug: fix multi-receipts extraction not working as intended
|
19
|
+
|
20
|
+
|
3
21
|
## v3.13.0 - 2024-09-18
|
4
22
|
### Changes
|
5
23
|
* :sparkles: add support for BillOfLadingV1
|
data/docs/bill_of_lading_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Bill of Lading API](https://platform.mindee.com/mindee/bill_of_lading).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,55 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 3b5250a1-b52c-4e0b-bc3e-2f0146b04e29
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/bill_of_lading v1.1
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Bill of Lading Number: XYZ123456
|
52
|
+
:Shipper:
|
53
|
+
:Address: 123 OCEAN DRIVE, SHANGHAI, CHINA
|
54
|
+
:Email:
|
55
|
+
:Name: GLOBAL FREIGHT SOLUTIONS INC.
|
56
|
+
:Phone: 86-21-12345678
|
57
|
+
:Consignee:
|
58
|
+
:Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
|
59
|
+
:Email:
|
60
|
+
:Name: PACIFIC TRADING CO.
|
61
|
+
:Phone: 65-65432100
|
62
|
+
:Notify Party:
|
63
|
+
:Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
|
64
|
+
:Email:
|
65
|
+
:Name: PACIFIC TRADING CO.
|
66
|
+
:Phone: 65-65432100
|
67
|
+
:Carrier:
|
68
|
+
:Name: GLOBAL SHIPPING CO.,LTD.
|
69
|
+
:Professional Number:
|
70
|
+
:SCAC:
|
71
|
+
:Items:
|
72
|
+
+--------------------------------------+--------------+-------------+------------------+----------+-------------+
|
73
|
+
| Description | Gross Weight | Measurement | Measurement Unit | Quantity | Weight Unit |
|
74
|
+
+======================================+==============+=============+==================+==========+=============+
|
75
|
+
| ELECTRONIC COMPONENTS\nP/N: 12345... | 500.00 | 1.50 | cbm | 1.00 | kgs |
|
76
|
+
+--------------------------------------+--------------+-------------+------------------+----------+-------------+
|
77
|
+
:Port of Loading: SHANGHAI, CHINA
|
78
|
+
:Port of Discharge: LOS ANGELES, USA
|
79
|
+
:Place of Delivery: LOS ANGELES, USA
|
80
|
+
:Date of issue: 2022-09-30
|
81
|
+
:Departure Date:
|
82
|
+
```
|
83
|
+
|
35
84
|
# Field Types
|
36
85
|
## Standard Fields
|
37
86
|
These fields are generic and used in several products.
|
data/docs/energy_bill_fra_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Energy Bill API](https://platform.mindee.com/mindee/energy_bill_fra).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,66 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 17f0ccef-e3fe-4a28-838d-d704489d6ce7
|
42
|
+
:Filename: default_sample.pdf
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/energy_bill_fra v1.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Invoice Number: 10123590373
|
52
|
+
:Contract ID: 1234567890
|
53
|
+
:Delivery Point: 98765432109876
|
54
|
+
:Invoice Date: 2021-01-29
|
55
|
+
:Due Date: 2021-02-15
|
56
|
+
:Total Before Taxes: 1241.03
|
57
|
+
:Total Taxes: 238.82
|
58
|
+
:Total Amount: 1479.85
|
59
|
+
:Energy Supplier:
|
60
|
+
:Address: TSA 12345, 12345 DEMOCITY CEDEX, 75001 PARIS
|
61
|
+
:Name: EDF
|
62
|
+
:Energy Consumer:
|
63
|
+
:Address: 12 AVENUE DES RÊVES, RDC A 123 COUR FAUSSE A, 75000 PARIS
|
64
|
+
:Name: John Doe
|
65
|
+
:Subscription:
|
66
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
67
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
68
|
+
+======================================+============+============+==========+===========+============+
|
69
|
+
| Abonnement électricité | 2021-02-28 | 2021-01-01 | 5.50 | 59.00 | 29.50 |
|
70
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
71
|
+
:Energy Usage:
|
72
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
73
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
74
|
+
+======================================+============+============+==========+===========+============+
|
75
|
+
| Consommation (HT) | 2021-01-27 | 2020-11-28 | 20.00 | 898.43 | 10.47 |
|
76
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
77
|
+
:Taxes and Contributions:
|
78
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
79
|
+
| Description | End Date | Start Date | Tax Rate | Total | Unit Price |
|
80
|
+
+======================================+============+============+==========+===========+============+
|
81
|
+
| Contribution au Service Public de... | 2021-01-27 | 2020-11-28 | 20.00 | 193.07 | 2.25 |
|
82
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
83
|
+
| Départementale sur la Conso Final... | 2020-12-31 | 2020-11-28 | 20.00 | 13.98 | 0.3315 |
|
84
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
85
|
+
| Communale sur la Conso Finale Ele... | 2021-01-27 | 2021-01-01 | 20.00 | 28.56 | 0.6545 |
|
86
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
87
|
+
| Contribution Tarifaire d'Achemine... | 2020-12-31 | 2020-11-28 | 20.00 | 27.96 | 0.663 |
|
88
|
+
+--------------------------------------+------------+------------+----------+-----------+------------+
|
89
|
+
:Meter Details:
|
90
|
+
:Meter Number: 620
|
91
|
+
:Meter Type: electricity
|
92
|
+
:Unit of Measure: kWh
|
93
|
+
```
|
94
|
+
|
35
95
|
# Field Types
|
36
96
|
## Standard Fields
|
37
97
|
These fields are generic and used in several products.
|
data/docs/expense_receipts_v5.md
CHANGED
@@ -50,7 +50,7 @@ Prediction
|
|
50
50
|
==========
|
51
51
|
:Expense Locale: en-GB; en; GB; GBP;
|
52
52
|
:Purchase Category: food
|
53
|
-
:Purchase Subcategory:
|
53
|
+
:Purchase Subcategory:
|
54
54
|
:Document Type: EXPENSE RECEIPT
|
55
55
|
:Purchase Date: 2016-02-26
|
56
56
|
:Purchase Time: 15:20
|
@@ -64,7 +64,7 @@ Prediction
|
|
64
64
|
+===============+========+==========+===============+
|
65
65
|
| 8.50 | VAT | 20.00 | 1.70 |
|
66
66
|
+---------------+--------+----------+---------------+
|
67
|
-
:Supplier Name:
|
67
|
+
:Supplier Name: Clachan
|
68
68
|
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
69
69
|
Type: VAT NUMBER, Value: 232153895
|
70
70
|
:Supplier Address: 34 Kingley Street W1B 50H
|
@@ -84,7 +84,7 @@ Page 0
|
|
84
84
|
------
|
85
85
|
:Expense Locale: en-GB; en; GB; GBP;
|
86
86
|
:Purchase Category: food
|
87
|
-
:Purchase Subcategory:
|
87
|
+
:Purchase Subcategory:
|
88
88
|
:Document Type: EXPENSE RECEIPT
|
89
89
|
:Purchase Date: 2016-02-26
|
90
90
|
:Purchase Time: 15:20
|
@@ -98,7 +98,7 @@ Page 0
|
|
98
98
|
+===============+========+==========+===============+
|
99
99
|
| 8.50 | VAT | 20.00 | 1.70 |
|
100
100
|
+---------------+--------+----------+---------------+
|
101
|
-
:Supplier Name:
|
101
|
+
:Supplier Name: Clachan
|
102
102
|
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
103
103
|
Type: VAT NUMBER, Value: 232153895
|
104
104
|
:Supplier Address: 34 Kingley Street W1B 50H
|
@@ -370,6 +370,20 @@ end
|
|
370
370
|
puts result.document.inference.prediction.locale.value
|
371
371
|
```
|
372
372
|
|
373
|
+
## Payment Date
|
374
|
+
**payment_date** ([DateField](#date-field)): The date on which the payment is due / fullfilled.
|
375
|
+
|
376
|
+
```rb
|
377
|
+
puts result.document.inference.prediction.payment_date.value
|
378
|
+
```
|
379
|
+
|
380
|
+
## Purchase Order Number
|
381
|
+
**po_number** ([StringField](#string-field)): The purchase order number.
|
382
|
+
|
383
|
+
```rb
|
384
|
+
puts result.document.inference.prediction.po_number.value
|
385
|
+
```
|
386
|
+
|
373
387
|
## Receipt Number
|
374
388
|
**receipt_number** ([StringField](#string-field)): The receipt number or identifier only if document is a receipt.
|
375
389
|
|
data/docs/invoices_v4.md
CHANGED
@@ -93,7 +93,7 @@ Prediction
|
|
93
93
|
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
|
94
94
|
:Supplier Phone Number: 4165551212
|
95
95
|
:Supplier Website:
|
96
|
-
:Supplier Email:
|
96
|
+
:Supplier Email: ldoi@example.com
|
97
97
|
:Customer Name: JIRO DOI
|
98
98
|
:Customer Company Registrations:
|
99
99
|
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
|
@@ -137,7 +137,7 @@ Page 0
|
|
137
137
|
:Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
|
138
138
|
:Supplier Phone Number: 4165551212
|
139
139
|
:Supplier Website:
|
140
|
-
:Supplier Email:
|
140
|
+
:Supplier Email: ldoi@example.com
|
141
141
|
:Customer Name: JIRO DOI
|
142
142
|
:Customer Company Registrations:
|
143
143
|
:Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
|
@@ -332,6 +332,20 @@ end
|
|
332
332
|
puts result.document.inference.prediction.locale.value
|
333
333
|
```
|
334
334
|
|
335
|
+
## Payment Date
|
336
|
+
**payment_date** ([DateField](#date-field)): The date on which the payment is due/ was full-filled.
|
337
|
+
|
338
|
+
```rb
|
339
|
+
puts result.document.inference.prediction.payment_date.value
|
340
|
+
```
|
341
|
+
|
342
|
+
## Purchase Order Number
|
343
|
+
**po_number** ([StringField](#string-field)): The purchase order number.
|
344
|
+
|
345
|
+
```rb
|
346
|
+
puts result.document.inference.prediction.po_number.value
|
347
|
+
```
|
348
|
+
|
335
349
|
## Reference Numbers
|
336
350
|
**reference_numbers** (Array<[StringField](#string-field)>): List of Reference numbers, including PO number.
|
337
351
|
|
data/docs/nutrition_facts_v1.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Nutrition Facts Label API](https://platform.mindee.com/mindee/nutrition_facts).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,85 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 38a12fe0-5d69-4ca4-9b30-12f1b659311c
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/nutrition_facts v1.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Serving per Box: 2.00
|
52
|
+
:Serving Size:
|
53
|
+
:Amount: 228.00
|
54
|
+
:Unit: g
|
55
|
+
:Calories:
|
56
|
+
:Daily Value:
|
57
|
+
:Per 100g:
|
58
|
+
:Per Serving: 250.00
|
59
|
+
:Total Fat:
|
60
|
+
:Daily Value:
|
61
|
+
:Per 100g:
|
62
|
+
:Per Serving: 12.00
|
63
|
+
:Saturated Fat:
|
64
|
+
:Daily Value: 15.00
|
65
|
+
:Per 100g:
|
66
|
+
:Per Serving: 3.00
|
67
|
+
:Trans Fat:
|
68
|
+
:Daily Value:
|
69
|
+
:Per 100g:
|
70
|
+
:Per Serving: 3.00
|
71
|
+
:Cholesterol:
|
72
|
+
:Daily Value: 10.00
|
73
|
+
:Per 100g:
|
74
|
+
:Per Serving: 30.00
|
75
|
+
:Total Carbohydrate:
|
76
|
+
:Daily Value: 10.00
|
77
|
+
:Per 100g:
|
78
|
+
:Per Serving: 31.00
|
79
|
+
:Dietary Fiber:
|
80
|
+
:Daily Value: 0.00
|
81
|
+
:Per 100g:
|
82
|
+
:Per Serving: 0.00
|
83
|
+
:Total Sugars:
|
84
|
+
:Daily Value:
|
85
|
+
:Per 100g:
|
86
|
+
:Per Serving: 5.00
|
87
|
+
:Added Sugars:
|
88
|
+
:Daily Value:
|
89
|
+
:Per 100g:
|
90
|
+
:Per Serving:
|
91
|
+
:Protein:
|
92
|
+
:Daily Value:
|
93
|
+
:Per 100g:
|
94
|
+
:Per Serving: 5.00
|
95
|
+
:sodium:
|
96
|
+
:Daily Value: 20.00
|
97
|
+
:Per 100g:
|
98
|
+
:Per Serving: 470.00
|
99
|
+
:Unit: mg
|
100
|
+
:nutrients:
|
101
|
+
+-------------+----------------------+----------+-------------+------+
|
102
|
+
| Daily Value | Name | Per 100g | Per Serving | Unit |
|
103
|
+
+=============+======================+==========+=============+======+
|
104
|
+
| 12.00 | Vitamin A | | 4.00 | mcg |
|
105
|
+
+-------------+----------------------+----------+-------------+------+
|
106
|
+
| 12.00 | Vitamin C | | 2.00 | mg |
|
107
|
+
+-------------+----------------------+----------+-------------+------+
|
108
|
+
| 12.00 | Calcium | | 45.60 | mg |
|
109
|
+
+-------------+----------------------+----------+-------------+------+
|
110
|
+
| 12.00 | Iron | | 0.90 | mg |
|
111
|
+
+-------------+----------------------+----------+-------------+------+
|
112
|
+
```
|
113
|
+
|
35
114
|
# Field Types
|
36
115
|
## Standard Fields
|
37
116
|
These fields are generic and used in several products.
|
data/docs/payslip_fra_v2.md
CHANGED
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
|
|
6
6
|
---
|
7
7
|
The Ruby OCR SDK supports the [Payslip API](https://platform.mindee.com/mindee/payslip_fra).
|
8
8
|
|
9
|
-
|
9
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
10
10
|

|
11
11
|
|
12
12
|
# Quick-Start
|
@@ -32,6 +32,82 @@ puts result.document
|
|
32
32
|
# puts result.document.inference.prediction
|
33
33
|
|
34
34
|
```
|
35
|
+
|
36
|
+
**Output (RST):**
|
37
|
+
```rst
|
38
|
+
########
|
39
|
+
Document
|
40
|
+
########
|
41
|
+
:Mindee ID: 972edba5-25aa-49d0-8431-e2557ddd788e
|
42
|
+
:Filename: default_sample.jpg
|
43
|
+
|
44
|
+
Inference
|
45
|
+
#########
|
46
|
+
:Product: mindee/payslip_fra v2.0
|
47
|
+
:Rotation applied: No
|
48
|
+
|
49
|
+
Prediction
|
50
|
+
==========
|
51
|
+
:Employee:
|
52
|
+
:Address: 52 RUE DES FLEURS 33500 LIBOURNE FRANCE
|
53
|
+
:Date of Birth:
|
54
|
+
:First Name: Jean Luc
|
55
|
+
:Last Name: Picard
|
56
|
+
:Phone Number:
|
57
|
+
:Registration Number:
|
58
|
+
:Social Security Number: 123456789012345
|
59
|
+
:Employer:
|
60
|
+
:Address: 1 RUE DU TONNOT 25210 DOUBS
|
61
|
+
:Company ID: 12345678901234
|
62
|
+
:Company Site:
|
63
|
+
:NAF Code: 1234A
|
64
|
+
:Name: DEMO COMPANY
|
65
|
+
:Phone Number:
|
66
|
+
:URSSAF Number:
|
67
|
+
:Bank Account Details:
|
68
|
+
:Bank Name:
|
69
|
+
:IBAN:
|
70
|
+
:SWIFT:
|
71
|
+
:Employment:
|
72
|
+
:Category: Cadre
|
73
|
+
:Coefficient: 600.00
|
74
|
+
:Collective Agreement: Construction -- Promotion
|
75
|
+
:Job Title: Directeur Régional du Développement
|
76
|
+
:Position Level:
|
77
|
+
:Start Date: 2022-05-01
|
78
|
+
:Salary Details:
|
79
|
+
+--------------+-----------+--------------------------------------+-----------+
|
80
|
+
| Amount | Base | Description | Rate |
|
81
|
+
+==============+===========+======================================+===========+
|
82
|
+
| 6666.67 | | Salaire de base | |
|
83
|
+
+--------------+-----------+--------------------------------------+-----------+
|
84
|
+
| 9.30 | | Part patronale Mutuelle NR | |
|
85
|
+
+--------------+-----------+--------------------------------------+-----------+
|
86
|
+
| 508.30 | | Avantages en nature voiture | |
|
87
|
+
+--------------+-----------+--------------------------------------+-----------+
|
88
|
+
:Pay Detail:
|
89
|
+
:Gross Salary: 7184.27
|
90
|
+
:Gross Salary YTD: 18074.81
|
91
|
+
:Income Tax Rate: 17.60
|
92
|
+
:Income Tax Withheld: 1030.99
|
93
|
+
:Net Paid: 3868.32
|
94
|
+
:Net Paid Before Tax: 4899.31
|
95
|
+
:Net Taxable: 5857.90
|
96
|
+
:Net Taxable YTD: 14752.73
|
97
|
+
:Total Cost Employer: 10486.94
|
98
|
+
:Total Taxes and Deductions: 1650.36
|
99
|
+
:PTO:
|
100
|
+
:Accrued This Period: 6.17
|
101
|
+
:Balance End of Period: 6.17
|
102
|
+
:Used This Period:
|
103
|
+
:Pay Period:
|
104
|
+
:End Date: 2023-03-31
|
105
|
+
:Month: 03
|
106
|
+
:Payment Date: 2023-03-29
|
107
|
+
:Start Date: 2023-03-01
|
108
|
+
:Year: 2023
|
109
|
+
```
|
110
|
+
|
35
111
|
# Field Types
|
36
112
|
## Standard Fields
|
37
113
|
These fields are generic and used in several products.
|
data/docs/us_mail_v2.md
CHANGED
@@ -38,7 +38,7 @@ puts result.document
|
|
38
38
|
:Sender Name: zed
|
39
39
|
:Sender Address:
|
40
40
|
:City: Dallas
|
41
|
-
:Complete Address: 54321 Elm Street, Dallas, Texas
|
41
|
+
:Complete Address: 54321 Elm Street, Dallas, Texas 54321
|
42
42
|
:Postal Code: 54321
|
43
43
|
:State: TX
|
44
44
|
:Street: 54321 Elm Street
|
@@ -2,42 +2,47 @@
|
|
2
2
|
|
3
3
|
require 'mindee'
|
4
4
|
|
5
|
-
|
6
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
-
|
8
|
-
|
9
|
-
input_source
|
10
|
-
|
11
|
-
if input_source.pdf?
|
12
|
-
pdf_extractor = Mindee::Extraction::PdfExtractor.new(input_source)
|
13
|
-
if pdf_extractor.page_count > 1
|
14
|
-
invoice_splitter_response = mindee_client.enqueue_and_parse(
|
15
|
-
input_source,
|
16
|
-
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1
|
17
|
-
)
|
18
|
-
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
|
19
|
-
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
|
20
|
-
extracted_pdfs.each do |extracted_pdf|
|
21
|
-
# Optional: Save the files locally
|
22
|
-
# extracted_pdf.write_to_file("output/path")
|
23
|
-
|
24
|
-
invoice_result = mindee_client.parse(
|
25
|
-
InvoiceV4,
|
26
|
-
extracted_pdf.as_source
|
27
|
-
)
|
28
|
-
puts invoice_result
|
29
|
-
end
|
5
|
+
def invoice_splitter_auto_extraction(file_path)
|
6
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
+
input_source = mindee_client.source_from_path(file_path)
|
8
|
+
|
9
|
+
if input_source.pdf? && input_source.count_pdf_pages > 1
|
10
|
+
parse_multi_page(mindee_client, input_source)
|
30
11
|
else
|
31
|
-
|
32
|
-
input_source,
|
33
|
-
Mindee::Product::Invoice::InvoiceV4
|
34
|
-
)
|
35
|
-
puts invoice_result.document
|
12
|
+
parse_single_page(mindee_client, input_source)
|
36
13
|
end
|
37
|
-
|
14
|
+
end
|
15
|
+
|
16
|
+
def parse_single_page(mindee_client, input_source)
|
38
17
|
invoice_result = mindee_client.parse(
|
39
18
|
input_source,
|
40
19
|
Mindee::Product::Invoice::InvoiceV4
|
41
20
|
)
|
42
21
|
puts invoice_result.document
|
43
22
|
end
|
23
|
+
|
24
|
+
def parse_multi_page(mindee_client, input_source)
|
25
|
+
pdf_extractor = Mindee::Extraction::PdfExtractor::PdfExtractor.new(input_source)
|
26
|
+
invoice_splitter_response = mindee_client.enqueue_and_parse(
|
27
|
+
input_source,
|
28
|
+
Mindee::Product::InvoiceSplitter::InvoiceSplitterV1,
|
29
|
+
close_file: false
|
30
|
+
)
|
31
|
+
page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
|
32
|
+
extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
|
33
|
+
|
34
|
+
extracted_pdfs.each do |extracted_pdf|
|
35
|
+
# Optional: Save the files locally
|
36
|
+
# extracted_pdf.write_to_file("output/path")
|
37
|
+
|
38
|
+
invoice_result = mindee_client.parse(
|
39
|
+
extracted_pdf.as_input_source,
|
40
|
+
Mindee::Product::Invoice::InvoiceV4,
|
41
|
+
close_file: false
|
42
|
+
)
|
43
|
+
puts invoice_result.document
|
44
|
+
end
|
45
|
+
end
|
46
|
+
|
47
|
+
my_file_path = '/path/to/the/file.ext'
|
48
|
+
invoice_splitter_auto_extraction(my_file_path)
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'mindee'
|
4
|
+
require 'mindee/extraction'
|
5
|
+
|
6
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
7
|
+
def multi_receipts_detection(file_path, mindee_client)
|
8
|
+
input_source = mindee_client.source_from_path(file_path)
|
9
|
+
|
10
|
+
result_split = mindee_client.parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::MultiReceiptsDetector::MultiReceiptsDetectorV1,
|
13
|
+
close_file: false
|
14
|
+
)
|
15
|
+
|
16
|
+
images = Mindee::Extraction::MultiReceiptsExtractor.extract_receipts(input_source, result_split.document.inference)
|
17
|
+
images.each do |sub_image|
|
18
|
+
# Optional: Save the files locally
|
19
|
+
# sub_image.write_to_file("/path/to/my/extracted/file/folder")
|
20
|
+
|
21
|
+
result_receipt = mindee_client.parse(
|
22
|
+
sub_image.as_source,
|
23
|
+
Mindee::Product::Receipt::ReceiptV5,
|
24
|
+
close_file: false
|
25
|
+
)
|
26
|
+
puts result_receipt.document
|
27
|
+
end
|
28
|
+
end
|
29
|
+
|
30
|
+
my_file_path = '/path/to/the/file.ext'
|
31
|
+
multi_receipts_detection(my_file_path, mindee_client)
|
data/lib/mindee/client.rb
CHANGED
@@ -128,6 +128,7 @@ module Mindee
|
|
128
128
|
end
|
129
129
|
|
130
130
|
# rubocop:disable Metrics/ParameterLists
|
131
|
+
|
131
132
|
# Enqueue a document for async parsing and automatically try to retrieve it
|
132
133
|
#
|
133
134
|
# @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
|
@@ -4,7 +4,7 @@ require_relative '../../input/sources'
|
|
4
4
|
|
5
5
|
module Mindee
|
6
6
|
# Image Extraction Module.
|
7
|
-
module
|
7
|
+
module Extraction
|
8
8
|
# Generic class for image extraction.
|
9
9
|
class ExtractedImage
|
10
10
|
# Id of the page the image was extracted from.
|
@@ -54,7 +54,6 @@ module Mindee
|
|
54
54
|
image = MiniMagick::Image.read(@buffer)
|
55
55
|
image.format file_format.downcase
|
56
56
|
image.write resolved_path.to_s
|
57
|
-
logger.info("File saved successfully to '#{resolved_path}'.")
|
58
57
|
rescue TypeError
|
59
58
|
raise 'Invalid path/filename provided.'
|
60
59
|
rescue StandardError
|