mindee 3.13.0 → 3.15.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +18 -0
  3. data/docs/bill_of_lading_v1.md +50 -1
  4. data/docs/energy_bill_fra_v1.md +61 -1
  5. data/docs/expense_receipts_v5.md +4 -4
  6. data/docs/financial_document_v1.md +14 -0
  7. data/docs/invoices_v4.md +16 -2
  8. data/docs/nutrition_facts_v1.md +80 -1
  9. data/docs/payslip_fra_v2.md +77 -1
  10. data/docs/us_mail_v2.md +1 -1
  11. data/examples/auto_invoice_splitter_extraction.rb +36 -31
  12. data/examples/auto_multi_receipts_detector_extraction.rb +31 -0
  13. data/lib/mindee/client.rb +1 -0
  14. data/lib/mindee/extraction/common/extracted_image.rb +1 -2
  15. data/lib/mindee/extraction/common/image_extractor.rb +147 -159
  16. data/lib/mindee/extraction/multi_receipts_extractor/multi_receipts_extractor.rb +22 -16
  17. data/lib/mindee/extraction/pdf_extractor/pdf_extractor.rb +3 -1
  18. data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +1 -0
  19. data/lib/mindee/geometry/point.rb +2 -1
  20. data/lib/mindee/image/image_compressor.rb +29 -0
  21. data/lib/mindee/image/image_utils.rb +104 -0
  22. data/lib/mindee/image.rb +4 -0
  23. data/lib/mindee/input/sources.rb +36 -0
  24. data/lib/mindee/parsing/standard/date_field.rb +4 -0
  25. data/lib/mindee/parsing/standard/position_field.rb +3 -0
  26. data/lib/mindee/pdf/pdf_compressor.rb +117 -0
  27. data/lib/mindee/pdf/{pdf_processing.rb → pdf_processor.rb} +17 -0
  28. data/lib/mindee/pdf/pdf_tools.rb +100 -0
  29. data/lib/mindee/pdf.rb +3 -1
  30. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +11 -1
  31. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
  32. data/lib/mindee/product/invoice/invoice_v4_document.rb +11 -1
  33. data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
  34. data/lib/mindee/version.rb +1 -1
  35. data/lib/mindee.rb +10 -0
  36. data/mindee.gemspec +2 -1
  37. metadata +32 -7
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 04d255d5113e3dc0f77b2f1d73bc13c9c5de423e7999d984480279488ee65835
4
- data.tar.gz: 02cfd3cec62b74c7bcbb90068deb6c791fa4f9e36837614f6712b36f32557b5f
3
+ metadata.gz: 9d356c6733d8a7d00973b219dbae06199040ca8d4bece4eb3906c8ec873aebf0
4
+ data.tar.gz: ab240a95c8538891aa4a3ef48285903daa06cebaf13f9578eff1a9675258d3bb
5
5
  SHA512:
6
- metadata.gz: d95a14c6fdd4238b135907c5b98eee4026287dbe14c2e71aac3d214f3cd92fd83472ff3bf9d59256892ee806c2375c97a6bdd3889a717ba76a3557c722cf3e9d
7
- data.tar.gz: ac8b4b8dd66883be7f6669d0da7e68740fdf1dc64c9db9dd6228ec927f300e0bbc43f1498ed231110932fc5531e7d840f7d5f1b40f60bde1415481b699f3d304
6
+ metadata.gz: ec71145b9604ba30c77a842a33c89c1ad4ab4c70301c3eed2292bc95803dd112ee99c964289ca88c16ddffcb6a37f63130b30a23326b4359929791d0dcef4214
7
+ data.tar.gz: 0c20c191f6abe4166075a1745860ba500a488294bcb59e2a28e0b61a3bcee07a25be2adfef113d045727eeaf10f935278271a7d259f78d63e580bf8eda3833f3
data/CHANGELOG.md CHANGED
@@ -1,5 +1,23 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v3.15.0 - 2024-10-29
4
+ ### Changes
5
+ * :sparkles: add support for image compression
6
+ * :sparkles: add support for PDF compression
7
+ ### Fixes
8
+ * :recycle: refactor pdf & image namespaces
9
+ * :memo: fix rubocop directives unexpectedly appearing in Yard documentation
10
+ * :arrow_up: bump version for mini_magick
11
+
12
+
13
+ ## v3.14.0 - 2024-10-11
14
+ ### Changes
15
+ * :sparkles: add support for Financial Document v1.10
16
+ * :sparkles: add support for Invoice v4.8
17
+ ### Fixes
18
+ * :bug: fix multi-receipts extraction not working as intended
19
+
20
+
3
21
  ## v3.13.0 - 2024-09-18
4
22
  ### Changes
5
23
  * :sparkles: add support for BillOfLadingV1
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
6
6
  ---
7
7
  The Ruby OCR SDK supports the [Bill of Lading API](https://platform.mindee.com/mindee/bill_of_lading).
8
8
 
9
- The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg) can be used for testing purposes.
9
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
10
10
  ![Bill of Lading sample](https://github.com/mindee/client-lib-test-data/blob/main/products/bill_of_lading/default_sample.jpg?raw=true)
11
11
 
12
12
  # Quick-Start
@@ -32,6 +32,55 @@ puts result.document
32
32
  # puts result.document.inference.prediction
33
33
 
34
34
  ```
35
+
36
+ **Output (RST):**
37
+ ```rst
38
+ ########
39
+ Document
40
+ ########
41
+ :Mindee ID: 3b5250a1-b52c-4e0b-bc3e-2f0146b04e29
42
+ :Filename: default_sample.jpg
43
+
44
+ Inference
45
+ #########
46
+ :Product: mindee/bill_of_lading v1.1
47
+ :Rotation applied: No
48
+
49
+ Prediction
50
+ ==========
51
+ :Bill of Lading Number: XYZ123456
52
+ :Shipper:
53
+ :Address: 123 OCEAN DRIVE, SHANGHAI, CHINA
54
+ :Email:
55
+ :Name: GLOBAL FREIGHT SOLUTIONS INC.
56
+ :Phone: 86-21-12345678
57
+ :Consignee:
58
+ :Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
59
+ :Email:
60
+ :Name: PACIFIC TRADING CO.
61
+ :Phone: 65-65432100
62
+ :Notify Party:
63
+ :Address: 789 TRADE STREET, SINGAPORE 567890, SINGAPORE
64
+ :Email:
65
+ :Name: PACIFIC TRADING CO.
66
+ :Phone: 65-65432100
67
+ :Carrier:
68
+ :Name: GLOBAL SHIPPING CO.,LTD.
69
+ :Professional Number:
70
+ :SCAC:
71
+ :Items:
72
+ +--------------------------------------+--------------+-------------+------------------+----------+-------------+
73
+ | Description | Gross Weight | Measurement | Measurement Unit | Quantity | Weight Unit |
74
+ +======================================+==============+=============+==================+==========+=============+
75
+ | ELECTRONIC COMPONENTS\nP/N: 12345... | 500.00 | 1.50 | cbm | 1.00 | kgs |
76
+ +--------------------------------------+--------------+-------------+------------------+----------+-------------+
77
+ :Port of Loading: SHANGHAI, CHINA
78
+ :Port of Discharge: LOS ANGELES, USA
79
+ :Place of Delivery: LOS ANGELES, USA
80
+ :Date of issue: 2022-09-30
81
+ :Departure Date:
82
+ ```
83
+
35
84
  # Field Types
36
85
  ## Standard Fields
37
86
  These fields are generic and used in several products.
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
6
6
  ---
7
7
  The Ruby OCR SDK supports the [Energy Bill API](https://platform.mindee.com/mindee/energy_bill_fra).
8
8
 
9
- The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.jpg) can be used for testing purposes.
9
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
10
10
  ![Energy Bill sample](https://github.com/mindee/client-lib-test-data/blob/main/products/energy_bill_fra/default_sample.jpg?raw=true)
11
11
 
12
12
  # Quick-Start
@@ -32,6 +32,66 @@ puts result.document
32
32
  # puts result.document.inference.prediction
33
33
 
34
34
  ```
35
+
36
+ **Output (RST):**
37
+ ```rst
38
+ ########
39
+ Document
40
+ ########
41
+ :Mindee ID: 17f0ccef-e3fe-4a28-838d-d704489d6ce7
42
+ :Filename: default_sample.pdf
43
+
44
+ Inference
45
+ #########
46
+ :Product: mindee/energy_bill_fra v1.0
47
+ :Rotation applied: No
48
+
49
+ Prediction
50
+ ==========
51
+ :Invoice Number: 10123590373
52
+ :Contract ID: 1234567890
53
+ :Delivery Point: 98765432109876
54
+ :Invoice Date: 2021-01-29
55
+ :Due Date: 2021-02-15
56
+ :Total Before Taxes: 1241.03
57
+ :Total Taxes: 238.82
58
+ :Total Amount: 1479.85
59
+ :Energy Supplier:
60
+ :Address: TSA 12345, 12345 DEMOCITY CEDEX, 75001 PARIS
61
+ :Name: EDF
62
+ :Energy Consumer:
63
+ :Address: 12 AVENUE DES RÊVES, RDC A 123 COUR FAUSSE A, 75000 PARIS
64
+ :Name: John Doe
65
+ :Subscription:
66
+ +--------------------------------------+------------+------------+----------+-----------+------------+
67
+ | Description | End Date | Start Date | Tax Rate | Total | Unit Price |
68
+ +======================================+============+============+==========+===========+============+
69
+ | Abonnement électricité | 2021-02-28 | 2021-01-01 | 5.50 | 59.00 | 29.50 |
70
+ +--------------------------------------+------------+------------+----------+-----------+------------+
71
+ :Energy Usage:
72
+ +--------------------------------------+------------+------------+----------+-----------+------------+
73
+ | Description | End Date | Start Date | Tax Rate | Total | Unit Price |
74
+ +======================================+============+============+==========+===========+============+
75
+ | Consommation (HT) | 2021-01-27 | 2020-11-28 | 20.00 | 898.43 | 10.47 |
76
+ +--------------------------------------+------------+------------+----------+-----------+------------+
77
+ :Taxes and Contributions:
78
+ +--------------------------------------+------------+------------+----------+-----------+------------+
79
+ | Description | End Date | Start Date | Tax Rate | Total | Unit Price |
80
+ +======================================+============+============+==========+===========+============+
81
+ | Contribution au Service Public de... | 2021-01-27 | 2020-11-28 | 20.00 | 193.07 | 2.25 |
82
+ +--------------------------------------+------------+------------+----------+-----------+------------+
83
+ | Départementale sur la Conso Final... | 2020-12-31 | 2020-11-28 | 20.00 | 13.98 | 0.3315 |
84
+ +--------------------------------------+------------+------------+----------+-----------+------------+
85
+ | Communale sur la Conso Finale Ele... | 2021-01-27 | 2021-01-01 | 20.00 | 28.56 | 0.6545 |
86
+ +--------------------------------------+------------+------------+----------+-----------+------------+
87
+ | Contribution Tarifaire d'Achemine... | 2020-12-31 | 2020-11-28 | 20.00 | 27.96 | 0.663 |
88
+ +--------------------------------------+------------+------------+----------+-----------+------------+
89
+ :Meter Details:
90
+ :Meter Number: 620
91
+ :Meter Type: electricity
92
+ :Unit of Measure: kWh
93
+ ```
94
+
35
95
  # Field Types
36
96
  ## Standard Fields
37
97
  These fields are generic and used in several products.
@@ -50,7 +50,7 @@ Prediction
50
50
  ==========
51
51
  :Expense Locale: en-GB; en; GB; GBP;
52
52
  :Purchase Category: food
53
- :Purchase Subcategory: restaurant
53
+ :Purchase Subcategory:
54
54
  :Document Type: EXPENSE RECEIPT
55
55
  :Purchase Date: 2016-02-26
56
56
  :Purchase Time: 15:20
@@ -64,7 +64,7 @@ Prediction
64
64
  +===============+========+==========+===============+
65
65
  | 8.50 | VAT | 20.00 | 1.70 |
66
66
  +---------------+--------+----------+---------------+
67
- :Supplier Name: clachan
67
+ :Supplier Name: Clachan
68
68
  :Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
69
69
  Type: VAT NUMBER, Value: 232153895
70
70
  :Supplier Address: 34 Kingley Street W1B 50H
@@ -84,7 +84,7 @@ Page 0
84
84
  ------
85
85
  :Expense Locale: en-GB; en; GB; GBP;
86
86
  :Purchase Category: food
87
- :Purchase Subcategory: restaurant
87
+ :Purchase Subcategory:
88
88
  :Document Type: EXPENSE RECEIPT
89
89
  :Purchase Date: 2016-02-26
90
90
  :Purchase Time: 15:20
@@ -98,7 +98,7 @@ Page 0
98
98
  +===============+========+==========+===============+
99
99
  | 8.50 | VAT | 20.00 | 1.70 |
100
100
  +---------------+--------+----------+---------------+
101
- :Supplier Name: clachan
101
+ :Supplier Name: Clachan
102
102
  :Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
103
103
  Type: VAT NUMBER, Value: 232153895
104
104
  :Supplier Address: 34 Kingley Street W1B 50H
@@ -370,6 +370,20 @@ end
370
370
  puts result.document.inference.prediction.locale.value
371
371
  ```
372
372
 
373
+ ## Payment Date
374
+ **payment_date** ([DateField](#date-field)): The date on which the payment is due / fullfilled.
375
+
376
+ ```rb
377
+ puts result.document.inference.prediction.payment_date.value
378
+ ```
379
+
380
+ ## Purchase Order Number
381
+ **po_number** ([StringField](#string-field)): The purchase order number.
382
+
383
+ ```rb
384
+ puts result.document.inference.prediction.po_number.value
385
+ ```
386
+
373
387
  ## Receipt Number
374
388
  **receipt_number** ([StringField](#string-field)): The receipt number or identifier only if document is a receipt.
375
389
 
data/docs/invoices_v4.md CHANGED
@@ -93,7 +93,7 @@ Prediction
93
93
  :Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
94
94
  :Supplier Phone Number: 4165551212
95
95
  :Supplier Website:
96
- :Supplier Email: i_doi@example.com
96
+ :Supplier Email: ldoi@example.com
97
97
  :Customer Name: JIRO DOI
98
98
  :Customer Company Registrations:
99
99
  :Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
@@ -137,7 +137,7 @@ Page 0
137
137
  :Supplier Address: 156 University Ave, Toronto ON, Canada, M5H 2H7
138
138
  :Supplier Phone Number: 4165551212
139
139
  :Supplier Website:
140
- :Supplier Email: i_doi@example.com
140
+ :Supplier Email: ldoi@example.com
141
141
  :Customer Name: JIRO DOI
142
142
  :Customer Company Registrations:
143
143
  :Customer Address: 1954 Bloor Street West Toronto, ON, M6P 3K9 Canada
@@ -332,6 +332,20 @@ end
332
332
  puts result.document.inference.prediction.locale.value
333
333
  ```
334
334
 
335
+ ## Payment Date
336
+ **payment_date** ([DateField](#date-field)): The date on which the payment is due/ was full-filled.
337
+
338
+ ```rb
339
+ puts result.document.inference.prediction.payment_date.value
340
+ ```
341
+
342
+ ## Purchase Order Number
343
+ **po_number** ([StringField](#string-field)): The purchase order number.
344
+
345
+ ```rb
346
+ puts result.document.inference.prediction.po_number.value
347
+ ```
348
+
335
349
  ## Reference Numbers
336
350
  **reference_numbers** (Array<[StringField](#string-field)>): List of Reference numbers, including PO number.
337
351
 
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
6
6
  ---
7
7
  The Ruby OCR SDK supports the [Nutrition Facts Label API](https://platform.mindee.com/mindee/nutrition_facts).
8
8
 
9
- The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg) can be used for testing purposes.
9
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
10
10
  ![Nutrition Facts Label sample](https://github.com/mindee/client-lib-test-data/blob/main/products/nutrition_facts/default_sample.jpg?raw=true)
11
11
 
12
12
  # Quick-Start
@@ -32,6 +32,85 @@ puts result.document
32
32
  # puts result.document.inference.prediction
33
33
 
34
34
  ```
35
+
36
+ **Output (RST):**
37
+ ```rst
38
+ ########
39
+ Document
40
+ ########
41
+ :Mindee ID: 38a12fe0-5d69-4ca4-9b30-12f1b659311c
42
+ :Filename: default_sample.jpg
43
+
44
+ Inference
45
+ #########
46
+ :Product: mindee/nutrition_facts v1.0
47
+ :Rotation applied: No
48
+
49
+ Prediction
50
+ ==========
51
+ :Serving per Box: 2.00
52
+ :Serving Size:
53
+ :Amount: 228.00
54
+ :Unit: g
55
+ :Calories:
56
+ :Daily Value:
57
+ :Per 100g:
58
+ :Per Serving: 250.00
59
+ :Total Fat:
60
+ :Daily Value:
61
+ :Per 100g:
62
+ :Per Serving: 12.00
63
+ :Saturated Fat:
64
+ :Daily Value: 15.00
65
+ :Per 100g:
66
+ :Per Serving: 3.00
67
+ :Trans Fat:
68
+ :Daily Value:
69
+ :Per 100g:
70
+ :Per Serving: 3.00
71
+ :Cholesterol:
72
+ :Daily Value: 10.00
73
+ :Per 100g:
74
+ :Per Serving: 30.00
75
+ :Total Carbohydrate:
76
+ :Daily Value: 10.00
77
+ :Per 100g:
78
+ :Per Serving: 31.00
79
+ :Dietary Fiber:
80
+ :Daily Value: 0.00
81
+ :Per 100g:
82
+ :Per Serving: 0.00
83
+ :Total Sugars:
84
+ :Daily Value:
85
+ :Per 100g:
86
+ :Per Serving: 5.00
87
+ :Added Sugars:
88
+ :Daily Value:
89
+ :Per 100g:
90
+ :Per Serving:
91
+ :Protein:
92
+ :Daily Value:
93
+ :Per 100g:
94
+ :Per Serving: 5.00
95
+ :sodium:
96
+ :Daily Value: 20.00
97
+ :Per 100g:
98
+ :Per Serving: 470.00
99
+ :Unit: mg
100
+ :nutrients:
101
+ +-------------+----------------------+----------+-------------+------+
102
+ | Daily Value | Name | Per 100g | Per Serving | Unit |
103
+ +=============+======================+==========+=============+======+
104
+ | 12.00 | Vitamin A | | 4.00 | mcg |
105
+ +-------------+----------------------+----------+-------------+------+
106
+ | 12.00 | Vitamin C | | 2.00 | mg |
107
+ +-------------+----------------------+----------+-------------+------+
108
+ | 12.00 | Calcium | | 45.60 | mg |
109
+ +-------------+----------------------+----------+-------------+------+
110
+ | 12.00 | Iron | | 0.90 | mg |
111
+ +-------------+----------------------+----------+-------------+------+
112
+ ```
113
+
35
114
  # Field Types
36
115
  ## Standard Fields
37
116
  These fields are generic and used in several products.
@@ -6,7 +6,7 @@ parentDoc: 6294d97ee723f1008d2ab28e
6
6
  ---
7
7
  The Ruby OCR SDK supports the [Payslip API](https://platform.mindee.com/mindee/payslip_fra).
8
8
 
9
- The [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg) can be used for testing purposes.
9
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
10
10
  ![Payslip sample](https://github.com/mindee/client-lib-test-data/blob/main/products/payslip_fra/default_sample.jpg?raw=true)
11
11
 
12
12
  # Quick-Start
@@ -32,6 +32,82 @@ puts result.document
32
32
  # puts result.document.inference.prediction
33
33
 
34
34
  ```
35
+
36
+ **Output (RST):**
37
+ ```rst
38
+ ########
39
+ Document
40
+ ########
41
+ :Mindee ID: 972edba5-25aa-49d0-8431-e2557ddd788e
42
+ :Filename: default_sample.jpg
43
+
44
+ Inference
45
+ #########
46
+ :Product: mindee/payslip_fra v2.0
47
+ :Rotation applied: No
48
+
49
+ Prediction
50
+ ==========
51
+ :Employee:
52
+ :Address: 52 RUE DES FLEURS 33500 LIBOURNE FRANCE
53
+ :Date of Birth:
54
+ :First Name: Jean Luc
55
+ :Last Name: Picard
56
+ :Phone Number:
57
+ :Registration Number:
58
+ :Social Security Number: 123456789012345
59
+ :Employer:
60
+ :Address: 1 RUE DU TONNOT 25210 DOUBS
61
+ :Company ID: 12345678901234
62
+ :Company Site:
63
+ :NAF Code: 1234A
64
+ :Name: DEMO COMPANY
65
+ :Phone Number:
66
+ :URSSAF Number:
67
+ :Bank Account Details:
68
+ :Bank Name:
69
+ :IBAN:
70
+ :SWIFT:
71
+ :Employment:
72
+ :Category: Cadre
73
+ :Coefficient: 600.00
74
+ :Collective Agreement: Construction -- Promotion
75
+ :Job Title: Directeur Régional du Développement
76
+ :Position Level:
77
+ :Start Date: 2022-05-01
78
+ :Salary Details:
79
+ +--------------+-----------+--------------------------------------+-----------+
80
+ | Amount | Base | Description | Rate |
81
+ +==============+===========+======================================+===========+
82
+ | 6666.67 | | Salaire de base | |
83
+ +--------------+-----------+--------------------------------------+-----------+
84
+ | 9.30 | | Part patronale Mutuelle NR | |
85
+ +--------------+-----------+--------------------------------------+-----------+
86
+ | 508.30 | | Avantages en nature voiture | |
87
+ +--------------+-----------+--------------------------------------+-----------+
88
+ :Pay Detail:
89
+ :Gross Salary: 7184.27
90
+ :Gross Salary YTD: 18074.81
91
+ :Income Tax Rate: 17.60
92
+ :Income Tax Withheld: 1030.99
93
+ :Net Paid: 3868.32
94
+ :Net Paid Before Tax: 4899.31
95
+ :Net Taxable: 5857.90
96
+ :Net Taxable YTD: 14752.73
97
+ :Total Cost Employer: 10486.94
98
+ :Total Taxes and Deductions: 1650.36
99
+ :PTO:
100
+ :Accrued This Period: 6.17
101
+ :Balance End of Period: 6.17
102
+ :Used This Period:
103
+ :Pay Period:
104
+ :End Date: 2023-03-31
105
+ :Month: 03
106
+ :Payment Date: 2023-03-29
107
+ :Start Date: 2023-03-01
108
+ :Year: 2023
109
+ ```
110
+
35
111
  # Field Types
36
112
  ## Standard Fields
37
113
  These fields are generic and used in several products.
data/docs/us_mail_v2.md CHANGED
@@ -38,7 +38,7 @@ puts result.document
38
38
  :Sender Name: zed
39
39
  :Sender Address:
40
40
  :City: Dallas
41
- :Complete Address: 54321 Elm Street, Dallas, Texas ...
41
+ :Complete Address: 54321 Elm Street, Dallas, Texas 54321
42
42
  :Postal Code: 54321
43
43
  :State: TX
44
44
  :Street: 54321 Elm Street
@@ -2,42 +2,47 @@
2
2
 
3
3
  require 'mindee'
4
4
 
5
- # Init a new client
6
- mindee_client = Mindee::Client.new(api_key: 'my-api-key')
7
-
8
- # Load a file from disk
9
- input_source = mindee_client.source_from_path('/path/to/the/file.ext')
10
-
11
- if input_source.pdf?
12
- pdf_extractor = Mindee::Extraction::PdfExtractor.new(input_source)
13
- if pdf_extractor.page_count > 1
14
- invoice_splitter_response = mindee_client.enqueue_and_parse(
15
- input_source,
16
- Mindee::Product::InvoiceSplitter::InvoiceSplitterV1
17
- )
18
- page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
19
- extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
20
- extracted_pdfs.each do |extracted_pdf|
21
- # Optional: Save the files locally
22
- # extracted_pdf.write_to_file("output/path")
23
-
24
- invoice_result = mindee_client.parse(
25
- InvoiceV4,
26
- extracted_pdf.as_source
27
- )
28
- puts invoice_result
29
- end
5
+ def invoice_splitter_auto_extraction(file_path)
6
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
7
+ input_source = mindee_client.source_from_path(file_path)
8
+
9
+ if input_source.pdf? && input_source.count_pdf_pages > 1
10
+ parse_multi_page(mindee_client, input_source)
30
11
  else
31
- invoice_result = mindee_client.parse(
32
- input_source,
33
- Mindee::Product::Invoice::InvoiceV4
34
- )
35
- puts invoice_result.document
12
+ parse_single_page(mindee_client, input_source)
36
13
  end
37
- else
14
+ end
15
+
16
+ def parse_single_page(mindee_client, input_source)
38
17
  invoice_result = mindee_client.parse(
39
18
  input_source,
40
19
  Mindee::Product::Invoice::InvoiceV4
41
20
  )
42
21
  puts invoice_result.document
43
22
  end
23
+
24
+ def parse_multi_page(mindee_client, input_source)
25
+ pdf_extractor = Mindee::Extraction::PdfExtractor::PdfExtractor.new(input_source)
26
+ invoice_splitter_response = mindee_client.enqueue_and_parse(
27
+ input_source,
28
+ Mindee::Product::InvoiceSplitter::InvoiceSplitterV1,
29
+ close_file: false
30
+ )
31
+ page_groups = invoice_splitter_response.document.inference.prediction.invoice_page_groups
32
+ extracted_pdfs = pdf_extractor.extract_invoices(page_groups, strict: false)
33
+
34
+ extracted_pdfs.each do |extracted_pdf|
35
+ # Optional: Save the files locally
36
+ # extracted_pdf.write_to_file("output/path")
37
+
38
+ invoice_result = mindee_client.parse(
39
+ extracted_pdf.as_input_source,
40
+ Mindee::Product::Invoice::InvoiceV4,
41
+ close_file: false
42
+ )
43
+ puts invoice_result.document
44
+ end
45
+ end
46
+
47
+ my_file_path = '/path/to/the/file.ext'
48
+ invoice_splitter_auto_extraction(my_file_path)
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mindee'
4
+ require 'mindee/extraction'
5
+
6
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
7
+ def multi_receipts_detection(file_path, mindee_client)
8
+ input_source = mindee_client.source_from_path(file_path)
9
+
10
+ result_split = mindee_client.parse(
11
+ input_source,
12
+ Mindee::Product::MultiReceiptsDetector::MultiReceiptsDetectorV1,
13
+ close_file: false
14
+ )
15
+
16
+ images = Mindee::Extraction::MultiReceiptsExtractor.extract_receipts(input_source, result_split.document.inference)
17
+ images.each do |sub_image|
18
+ # Optional: Save the files locally
19
+ # sub_image.write_to_file("/path/to/my/extracted/file/folder")
20
+
21
+ result_receipt = mindee_client.parse(
22
+ sub_image.as_source,
23
+ Mindee::Product::Receipt::ReceiptV5,
24
+ close_file: false
25
+ )
26
+ puts result_receipt.document
27
+ end
28
+ end
29
+
30
+ my_file_path = '/path/to/the/file.ext'
31
+ multi_receipts_detection(my_file_path, mindee_client)
data/lib/mindee/client.rb CHANGED
@@ -128,6 +128,7 @@ module Mindee
128
128
  end
129
129
 
130
130
  # rubocop:disable Metrics/ParameterLists
131
+
131
132
  # Enqueue a document for async parsing and automatically try to retrieve it
132
133
  #
133
134
  # @param input_source [Mindee::Input::Source::LocalInputSource, Mindee::Input::Source::UrlInputSource]
@@ -4,7 +4,7 @@ require_relative '../../input/sources'
4
4
 
5
5
  module Mindee
6
6
  # Image Extraction Module.
7
- module ImageExtraction
7
+ module Extraction
8
8
  # Generic class for image extraction.
9
9
  class ExtractedImage
10
10
  # Id of the page the image was extracted from.
@@ -54,7 +54,6 @@ module Mindee
54
54
  image = MiniMagick::Image.read(@buffer)
55
55
  image.format file_format.downcase
56
56
  image.write resolved_path.to_s
57
- logger.info("File saved successfully to '#{resolved_path}'.")
58
57
  rescue TypeError
59
58
  raise 'Invalid path/filename provided.'
60
59
  rescue StandardError