mindee 3.10.0 → 3.12.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +14 -0
- data/bin/mindee.rb +7 -1
- data/docs/code_samples/financial_document_v1_async.txt +19 -0
- data/docs/code_samples/us_healthcare_cards_v1_async.txt +19 -0
- data/docs/expense_receipts_v5.md +12 -10
- data/docs/financial_document_v1.md +51 -22
- data/docs/invoices_v4.md +4 -3
- data/docs/us_healthcare_cards_v1.md +204 -0
- data/lib/mindee/extraction/ocr_extractor.rb +110 -0
- data/lib/mindee/extraction/tax_extractor.rb +322 -0
- data/lib/mindee/extraction.rb +3 -0
- data/lib/mindee/geometry/utils.rb +19 -0
- data/lib/mindee/image_extraction/common/extracted_image.rb +73 -0
- data/lib/mindee/image_extraction/common/image_extractor.rb +191 -0
- data/lib/mindee/image_extraction/common.rb +3 -0
- data/lib/mindee/image_extraction/multi_receipts_extractor/multi_receipts_extractor.rb +26 -0
- data/lib/mindee/image_extraction/multi_receipts_extractor.rb +3 -0
- data/lib/mindee/image_extraction.rb +4 -0
- data/lib/mindee/input/sources.rb +8 -0
- data/lib/mindee/parsing/common/api_response.rb +1 -1
- data/lib/mindee/parsing/common/ocr/mvision_v1.rb +16 -0
- data/lib/mindee/parsing/common/ocr/ocr.rb +10 -0
- data/lib/mindee/parsing/standard/company_registration_field.rb +17 -0
- data/lib/mindee/product/financial_document/financial_document_v1_document.rb +3 -1
- data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +7 -0
- data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
- data/lib/mindee/product/international_id/international_id_v2_document.rb +1 -1
- data/lib/mindee/product/international_id/international_id_v2_page.rb +1 -1
- data/lib/mindee/product/invoice/invoice_v4_document.rb +3 -1
- data/lib/mindee/product/invoice/invoice_v4_line_item.rb +7 -0
- data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +1 -1
- data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +1 -1
- data/lib/mindee/product/receipt/receipt_v5_document.rb +1 -1
- data/lib/mindee/product/receipt/receipt_v5_page.rb +1 -1
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1.rb +41 -0
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +57 -0
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +127 -0
- data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +34 -0
- data/lib/mindee/product.rb +1 -0
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +5 -0
- data/mindee.gemspec +1 -0
- metadata +32 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ef77c472e79a3139844c6e719cb368c081466ceed4ded849a39eefb27484ca78
|
4
|
+
data.tar.gz: 4d41a61f23ccad56d6f9e4a846feed4c67c613d17a80baf0191aec119eb0835a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: df1777eaec4a97d7524e0bf0cc3e2356169a9a62af6349a10abacc8d1977c83124cbdeafed2e1d94f4cef4e3986c5e488fea45c31c1048c3feed85cdc8e0fb74
|
7
|
+
data.tar.gz: 998987a5877b57d1ef2fc078c144f247369c2824b5a84334297e2bed81e61a77b894a863d5a4920d23cb3093b18f86bb0201b7e51a971136f3c2c1bb33c065ea
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,19 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v3.12.0 - 2024-07-24
|
4
|
+
### Changes
|
5
|
+
* :sparkles: add support for Multi-Receipts Extraction
|
6
|
+
* :sparkles: add support for Healthcare Card V1
|
7
|
+
* :sparkles: add support for Invoice V4.7
|
8
|
+
* :sparkles: add support for Financial Document V1.9
|
9
|
+
* :recycle: update display for company registration fields
|
10
|
+
|
11
|
+
|
12
|
+
## v3.11.0 - 2024-06-10
|
13
|
+
### Changes
|
14
|
+
* :sparkles: add custom tax extraction feature (#76)
|
15
|
+
|
16
|
+
|
3
17
|
## v3.10.0 - 2024-05-31
|
4
18
|
### Changes
|
5
19
|
* :sparkles: add support for us mail v2 (#98)
|
data/bin/mindee.rb
CHANGED
@@ -35,7 +35,7 @@ DOCUMENTS = {
|
|
35
35
|
description: 'Financial Document',
|
36
36
|
doc_class: Mindee::Product::FinancialDocument::FinancialDocumentV1,
|
37
37
|
sync: true,
|
38
|
-
async:
|
38
|
+
async: true,
|
39
39
|
},
|
40
40
|
"invoice" => {
|
41
41
|
description: 'Invoice',
|
@@ -115,6 +115,12 @@ DOCUMENTS = {
|
|
115
115
|
sync: true,
|
116
116
|
async: false,
|
117
117
|
},
|
118
|
+
"us-heathcare-card" => {
|
119
|
+
description: "US Healthcare Card",
|
120
|
+
doc_class: Mindee::Product::US::HealthcareCard::HealthcareCardV1,
|
121
|
+
sync: false,
|
122
|
+
async: true
|
123
|
+
},
|
118
124
|
"us-w9" => {
|
119
125
|
description: "US W9 Form",
|
120
126
|
doc_class: Mindee::Product::US::W9::W9V1,
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'mindee'
|
2
|
+
|
3
|
+
# Init a new client
|
4
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
|
+
|
6
|
+
# Load a file from disk
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
+
|
9
|
+
# Parse the file
|
10
|
+
result = mindee_client.enqueue_and_parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::FinancialDocument::FinancialDocumentV1
|
13
|
+
)
|
14
|
+
|
15
|
+
# Print a full summary of the parsed data in RST format
|
16
|
+
puts result.document
|
17
|
+
|
18
|
+
# Print the document-level parsed data
|
19
|
+
# puts result.document.inference.prediction
|
@@ -0,0 +1,19 @@
|
|
1
|
+
require 'mindee'
|
2
|
+
|
3
|
+
# Init a new client
|
4
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
|
+
|
6
|
+
# Load a file from disk
|
7
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
+
|
9
|
+
# Parse the file
|
10
|
+
result = mindee_client.enqueue_and_parse(
|
11
|
+
input_source,
|
12
|
+
Mindee::Product::US::HealthcareCard::HealthcareCardV1
|
13
|
+
)
|
14
|
+
|
15
|
+
# Print a full summary of the parsed data in RST format
|
16
|
+
puts result.document
|
17
|
+
|
18
|
+
# Print the document-level parsed data
|
19
|
+
# puts result.document.inference.prediction
|
data/docs/expense_receipts_v5.md
CHANGED
@@ -34,12 +34,12 @@ puts result.document
|
|
34
34
|
########
|
35
35
|
Document
|
36
36
|
########
|
37
|
-
:Mindee ID:
|
37
|
+
:Mindee ID: d96fb043-8fb8-4adc-820c-387aae83376d
|
38
38
|
:Filename: default_sample.jpg
|
39
39
|
|
40
40
|
Inference
|
41
41
|
#########
|
42
|
-
:Product: mindee/expense_receipts v5.
|
42
|
+
:Product: mindee/expense_receipts v5.3
|
43
43
|
:Rotation applied: Yes
|
44
44
|
|
45
45
|
Prediction
|
@@ -60,11 +60,12 @@ Prediction
|
|
60
60
|
+===============+========+==========+===============+
|
61
61
|
| 8.50 | VAT | 20.00 | 1.70 |
|
62
62
|
+---------------+--------+----------+---------------+
|
63
|
-
:Supplier Name:
|
64
|
-
:Supplier Company Registrations: 232153895
|
65
|
-
232153895
|
66
|
-
:Supplier Address: 34
|
63
|
+
:Supplier Name: clachan
|
64
|
+
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
65
|
+
Type: VAT NUMBER, Value: 232153895
|
66
|
+
:Supplier Address: 34 Kingley Street W1B 50H
|
67
67
|
:Supplier Phone Number: 02074940834
|
68
|
+
:Receipt Number: 54/7500
|
68
69
|
:Line Items:
|
69
70
|
+--------------------------------------+----------+--------------+------------+
|
70
71
|
| Description | Quantity | Total Amount | Unit Price |
|
@@ -93,11 +94,12 @@ Page 0
|
|
93
94
|
+===============+========+==========+===============+
|
94
95
|
| 8.50 | VAT | 20.00 | 1.70 |
|
95
96
|
+---------------+--------+----------+---------------+
|
96
|
-
:Supplier Name:
|
97
|
-
:Supplier Company Registrations: 232153895
|
98
|
-
232153895
|
99
|
-
:Supplier Address: 34
|
97
|
+
:Supplier Name: clachan
|
98
|
+
:Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
|
99
|
+
Type: VAT NUMBER, Value: 232153895
|
100
|
+
:Supplier Address: 34 Kingley Street W1B 50H
|
100
101
|
:Supplier Phone Number: 02074940834
|
102
|
+
:Receipt Number: 54/7500
|
101
103
|
:Line Items:
|
102
104
|
+--------------------------------------+----------+--------------+------------+
|
103
105
|
| Description | Quantity | Total Amount | Unit Price |
|
@@ -29,23 +29,49 @@ puts result.document
|
|
29
29
|
# puts result.document.inference.prediction
|
30
30
|
```
|
31
31
|
|
32
|
+
You can also call this product asynchronously:
|
33
|
+
|
34
|
+
```rb
|
35
|
+
require 'mindee'
|
36
|
+
|
37
|
+
# Init a new client
|
38
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
39
|
+
|
40
|
+
# Load a file from disk
|
41
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
42
|
+
|
43
|
+
# Parse the file
|
44
|
+
result = mindee_client.enqueue_and_parse(
|
45
|
+
input_source,
|
46
|
+
Mindee::Product::FinancialDocument::FinancialDocumentV1
|
47
|
+
)
|
48
|
+
|
49
|
+
# Print a full summary of the parsed data in RST format
|
50
|
+
puts result.document
|
51
|
+
|
52
|
+
# Print the document-level parsed data
|
53
|
+
# puts result.document.inference.prediction
|
54
|
+
```
|
55
|
+
|
32
56
|
**Output (RST):**
|
33
57
|
```rst
|
34
58
|
########
|
35
59
|
Document
|
36
60
|
########
|
37
|
-
:Mindee ID:
|
61
|
+
:Mindee ID: 3859a462-e05f-4f4c-a736-febca66b9aa9
|
38
62
|
:Filename: default_sample.jpg
|
39
63
|
|
40
64
|
Inference
|
41
65
|
#########
|
42
|
-
:Product: mindee/financial_document v1.
|
66
|
+
:Product: mindee/financial_document v1.9
|
43
67
|
:Rotation applied: Yes
|
44
68
|
|
45
69
|
Prediction
|
46
70
|
==========
|
47
|
-
:Locale: en; USD;
|
71
|
+
:Locale: en; en; USD;
|
48
72
|
:Invoice Number: INT-001
|
73
|
+
:Receipt Number:
|
74
|
+
:Document Number: INT-001
|
49
75
|
:Reference Numbers: 2412/2019
|
50
76
|
:Purchase Date: 2019-11-02
|
51
77
|
:Due Date: 2019-02-26
|
@@ -77,23 +103,25 @@ Prediction
|
|
77
103
|
:Tip and Gratuity:
|
78
104
|
:Purchase Time:
|
79
105
|
:Line Items:
|
80
|
-
|
81
|
-
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
|
82
|
-
|
83
|
-
| Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
|
84
|
-
|
85
|
-
| New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
|
86
|
-
|
87
|
-
| Labor 3hrs | | 3.00 | | | 45.00 | 15.00 |
|
88
|
-
|
106
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
107
|
+
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
|
108
|
+
+======================================+==============+==========+============+==============+==============+=================+============+
|
109
|
+
| Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 |
|
110
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
111
|
+
| New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 |
|
112
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
113
|
+
| Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 |
|
114
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
89
115
|
|
90
116
|
Page Predictions
|
91
117
|
================
|
92
118
|
|
93
119
|
Page 0
|
94
120
|
------
|
95
|
-
:Locale: en; USD;
|
121
|
+
:Locale: en; en; USD;
|
96
122
|
:Invoice Number: INT-001
|
123
|
+
:Receipt Number:
|
124
|
+
:Document Number: INT-001
|
97
125
|
:Reference Numbers: 2412/2019
|
98
126
|
:Purchase Date: 2019-11-02
|
99
127
|
:Due Date: 2019-02-26
|
@@ -125,15 +153,15 @@ Page 0
|
|
125
153
|
:Tip and Gratuity:
|
126
154
|
:Purchase Time:
|
127
155
|
:Line Items:
|
128
|
-
|
129
|
-
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
|
130
|
-
|
131
|
-
| Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
|
132
|
-
|
133
|
-
| New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
|
134
|
-
|
135
|
-
| Labor 3hrs | | 3.00 | | | 45.00 | 15.00 |
|
136
|
-
|
156
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
157
|
+
| Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
|
158
|
+
+======================================+==============+==========+============+==============+==============+=================+============+
|
159
|
+
| Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 |
|
160
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
161
|
+
| New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 |
|
162
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
163
|
+
| Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 |
|
164
|
+
+--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
|
137
165
|
```
|
138
166
|
|
139
167
|
# Field Types
|
@@ -220,6 +248,7 @@ A `FinancialDocumentV1LineItem` implements the following attributes:
|
|
220
248
|
* `tax_amount` (Float): The item tax amount.
|
221
249
|
* `tax_rate` (Float): The item tax rate in percentage.
|
222
250
|
* `total_amount` (Float): The item total amount.
|
251
|
+
* `unit_measure` (String): The item unit of measure.
|
223
252
|
* `unit_price` (Float): The item unit price.
|
224
253
|
|
225
254
|
# Attributes
|
data/docs/invoices_v4.md
CHANGED
@@ -34,7 +34,7 @@ puts result.document
|
|
34
34
|
########
|
35
35
|
Document
|
36
36
|
########
|
37
|
-
:Mindee ID:
|
37
|
+
:Mindee ID: 128a314f-1adb-42eb-a9e3-402055a8b8ce
|
38
38
|
:Filename: default_sample.jpg
|
39
39
|
|
40
40
|
Inference
|
@@ -49,7 +49,7 @@ Prediction
|
|
49
49
|
:Reference Numbers: AD29094
|
50
50
|
:Purchase Date: 2018-09-25
|
51
51
|
:Due Date:
|
52
|
-
:Total Net:
|
52
|
+
:Total Net: 2145.00
|
53
53
|
:Total Amount: 2608.20
|
54
54
|
:Total Tax: 193.20
|
55
55
|
:Taxes:
|
@@ -93,7 +93,7 @@ Page 0
|
|
93
93
|
:Reference Numbers: AD29094
|
94
94
|
:Purchase Date: 2018-09-25
|
95
95
|
:Due Date:
|
96
|
-
:Total Net:
|
96
|
+
:Total Net: 2145.00
|
97
97
|
:Total Amount: 2608.20
|
98
98
|
:Total Tax: 193.20
|
99
99
|
:Taxes:
|
@@ -212,6 +212,7 @@ A `InvoiceV4LineItem` implements the following attributes:
|
|
212
212
|
* `tax_amount` (Float): The item tax amount.
|
213
213
|
* `tax_rate` (Float): The item tax rate in percentage.
|
214
214
|
* `total_amount` (Float): The item total amount.
|
215
|
+
* `unit_measure` (String): The item unit of measure.
|
215
216
|
* `unit_price` (Float): The item unit price.
|
216
217
|
|
217
218
|
# Attributes
|
@@ -0,0 +1,204 @@
|
|
1
|
+
---
|
2
|
+
title: US Healthcare Card OCR Ruby
|
3
|
+
---
|
4
|
+
The Ruby OCR SDK supports the [Healthcare Card API](https://platform.mindee.com/mindee/us_healthcare_cards).
|
5
|
+
|
6
|
+
Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_healthcare_cards/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
|
7
|
+

|
8
|
+
|
9
|
+
# Quick-Start
|
10
|
+
```rb
|
11
|
+
require 'mindee'
|
12
|
+
|
13
|
+
# Init a new client
|
14
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
15
|
+
|
16
|
+
# Load a file from disk
|
17
|
+
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
18
|
+
|
19
|
+
# Parse the file
|
20
|
+
result = mindee_client.enqueue_and_parse(
|
21
|
+
input_source,
|
22
|
+
Mindee::Product::US::HealthcareCard::HealthcareCardV1
|
23
|
+
)
|
24
|
+
|
25
|
+
# Print a full summary of the parsed data in RST format
|
26
|
+
puts result.document
|
27
|
+
|
28
|
+
# Print the document-level parsed data
|
29
|
+
# puts result.document.inference.prediction
|
30
|
+
```
|
31
|
+
|
32
|
+
**Output (RST):**
|
33
|
+
```rst
|
34
|
+
########
|
35
|
+
Document
|
36
|
+
########
|
37
|
+
:Mindee ID: 0ced9f49-00c0-4a1d-8221-4a1538813a95
|
38
|
+
:Filename: default_sample.jpg
|
39
|
+
|
40
|
+
Inference
|
41
|
+
#########
|
42
|
+
:Product: mindee/us_healthcare_cards v1.0
|
43
|
+
:Rotation applied: No
|
44
|
+
|
45
|
+
Prediction
|
46
|
+
==========
|
47
|
+
:Company Name: UnitedHealthcare
|
48
|
+
:Member Name: SUBSCRIBER SMITH
|
49
|
+
:Member ID: 123456789
|
50
|
+
:Issuer 80840:
|
51
|
+
:Dependents: SPOUSE SMITH
|
52
|
+
CHILD1 SMITH
|
53
|
+
CHILD2 SMITH
|
54
|
+
CHILD3 SMITH
|
55
|
+
:Group Number: 98765
|
56
|
+
:Payer ID: 87726
|
57
|
+
:RX BIN: 610279
|
58
|
+
:RX GRP: UHEALTH
|
59
|
+
:RX PCN: 9999
|
60
|
+
:copays:
|
61
|
+
+--------------+--------------+
|
62
|
+
| Service Fees | Service Name |
|
63
|
+
+==============+==============+
|
64
|
+
| 20.00 | office visit |
|
65
|
+
+--------------+--------------+
|
66
|
+
| 300.00 | emergency |
|
67
|
+
+--------------+--------------+
|
68
|
+
| 75.00 | urgent care |
|
69
|
+
+--------------+--------------+
|
70
|
+
| 30.00 | specialist |
|
71
|
+
+--------------+--------------+
|
72
|
+
:Enrollment Date: 2023-09-13
|
73
|
+
```
|
74
|
+
|
75
|
+
# Field Types
|
76
|
+
## Standard Fields
|
77
|
+
These fields are generic and used in several products.
|
78
|
+
|
79
|
+
### Basic Field
|
80
|
+
Each prediction object contains a set of fields that inherit from the generic `Field` class.
|
81
|
+
A typical `Field` object will have the following attributes:
|
82
|
+
|
83
|
+
* **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
|
84
|
+
* **confidence** (Float, nil): the confidence score of the field prediction.
|
85
|
+
* **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
|
86
|
+
* **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
|
87
|
+
* **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
|
88
|
+
* **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
|
89
|
+
|
90
|
+
|
91
|
+
Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
|
92
|
+
|
93
|
+
### Date Field
|
94
|
+
Aside from the basic `Field` attributes, the date field `DateField` also implements the following:
|
95
|
+
|
96
|
+
* **date_object** (`Date`): an accessible representation of the value as a JavaScript object.
|
97
|
+
|
98
|
+
### String Field
|
99
|
+
The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
|
100
|
+
|
101
|
+
## Specific Fields
|
102
|
+
Fields which are specific to this product; they are not used in any other product.
|
103
|
+
|
104
|
+
### copays Field
|
105
|
+
Is a fixed amount for a covered service.
|
106
|
+
|
107
|
+
A `HealthcareCardV1Copay` implements the following attributes:
|
108
|
+
|
109
|
+
* `service_fees` (Float): The price of service.
|
110
|
+
* `service_name` (String): The name of service of the copay.
|
111
|
+
|
112
|
+
# Attributes
|
113
|
+
The following fields are extracted for Healthcare Card V1:
|
114
|
+
|
115
|
+
## Company Name
|
116
|
+
**company_name** ([StringField](#string-field)): The name of the company that provides the healthcare plan.
|
117
|
+
|
118
|
+
```rb
|
119
|
+
puts result.document.inference.prediction.company_name.value
|
120
|
+
```
|
121
|
+
|
122
|
+
## copays
|
123
|
+
**copays** (Array<[HealthcareCardV1Copay](#copays-field)>): Is a fixed amount for a covered service.
|
124
|
+
|
125
|
+
```rb
|
126
|
+
for copays_elem in result.document.inference.prediction.copays do
|
127
|
+
puts copays_elem.value
|
128
|
+
end
|
129
|
+
```
|
130
|
+
|
131
|
+
## Dependents
|
132
|
+
**dependents** (Array<[StringField](#string-field)>): The list of dependents covered by the healthcare plan.
|
133
|
+
|
134
|
+
```rb
|
135
|
+
for dependents_elem in result.document.inference.prediction.dependents do
|
136
|
+
puts dependents_elem.value
|
137
|
+
end
|
138
|
+
```
|
139
|
+
|
140
|
+
## Enrollment Date
|
141
|
+
**enrollment_date** ([DateField](#date-field)): The date when the member enrolled in the healthcare plan.
|
142
|
+
|
143
|
+
```rb
|
144
|
+
puts result.document.inference.prediction.enrollment_date.value
|
145
|
+
```
|
146
|
+
|
147
|
+
## Group Number
|
148
|
+
**group_number** ([StringField](#string-field)): The group number associated with the healthcare plan.
|
149
|
+
|
150
|
+
```rb
|
151
|
+
puts result.document.inference.prediction.group_number.value
|
152
|
+
```
|
153
|
+
|
154
|
+
## Issuer 80840
|
155
|
+
**issuer80840** ([StringField](#string-field)): The organization that issued the healthcare plan.
|
156
|
+
|
157
|
+
```rb
|
158
|
+
puts result.document.inference.prediction.issuer80840.value
|
159
|
+
```
|
160
|
+
|
161
|
+
## Member ID
|
162
|
+
**member_id** ([StringField](#string-field)): The unique identifier for the member in the healthcare system.
|
163
|
+
|
164
|
+
```rb
|
165
|
+
puts result.document.inference.prediction.member_id.value
|
166
|
+
```
|
167
|
+
|
168
|
+
## Member Name
|
169
|
+
**member_name** ([StringField](#string-field)): The name of the member covered by the healthcare plan.
|
170
|
+
|
171
|
+
```rb
|
172
|
+
puts result.document.inference.prediction.member_name.value
|
173
|
+
```
|
174
|
+
|
175
|
+
## Payer ID
|
176
|
+
**payer_id** ([StringField](#string-field)): The unique identifier for the payer in the healthcare system.
|
177
|
+
|
178
|
+
```rb
|
179
|
+
puts result.document.inference.prediction.payer_id.value
|
180
|
+
```
|
181
|
+
|
182
|
+
## RX BIN
|
183
|
+
**rx_bin** ([StringField](#string-field)): The BIN number for prescription drug coverage.
|
184
|
+
|
185
|
+
```rb
|
186
|
+
puts result.document.inference.prediction.rx_bin.value
|
187
|
+
```
|
188
|
+
|
189
|
+
## RX GRP
|
190
|
+
**rx_grp** ([StringField](#string-field)): The group number for prescription drug coverage.
|
191
|
+
|
192
|
+
```rb
|
193
|
+
puts result.document.inference.prediction.rx_grp.value
|
194
|
+
```
|
195
|
+
|
196
|
+
## RX PCN
|
197
|
+
**rx_pcn** ([StringField](#string-field)): The PCN number for prescription drug coverage.
|
198
|
+
|
199
|
+
```rb
|
200
|
+
puts result.document.inference.prediction.rx_pcn.value
|
201
|
+
```
|
202
|
+
|
203
|
+
# Questions?
|
204
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Mindee
|
4
|
+
module Extraction
|
5
|
+
# Generic extractor class
|
6
|
+
class OcrExtractor
|
7
|
+
# Checks for a list of possible matches in a string & returns the index of the first found candidate.
|
8
|
+
# Case & diacritics insensitive.
|
9
|
+
# @param text [String] string to search for matches.
|
10
|
+
# @param str_candidates [Array<String>] array of values to look for
|
11
|
+
# @return [Integer, nil]
|
12
|
+
def self.match_index(text, str_candidates)
|
13
|
+
idx = nil
|
14
|
+
str_candidates.each do |str_candidate|
|
15
|
+
found_idx = remove_accents(text.downcase).index(remove_accents(str_candidate.downcase))
|
16
|
+
idx = found_idx if idx.nil?
|
17
|
+
idx = found_idx if !found_idx.nil? && found_idx >= idx
|
18
|
+
end
|
19
|
+
idx
|
20
|
+
end
|
21
|
+
|
22
|
+
# Normalizes text by removing diacritics.
|
23
|
+
# @param input_str [String] string to handle.
|
24
|
+
# @return [String]
|
25
|
+
def self.remove_accents(input_str)
|
26
|
+
diacritics = [*0x1DC0..0x1DFF, *0x0300..0x036F, *0xFE20..0xFE2F].pack('U*')
|
27
|
+
input_str
|
28
|
+
.unicode_normalize(:nfd)
|
29
|
+
.tr(diacritics, '')
|
30
|
+
.unicode_normalize(:nfc)
|
31
|
+
.scrub
|
32
|
+
end
|
33
|
+
|
34
|
+
# Checks if a given percentage value is within the allowed range
|
35
|
+
# @param value [Integer] The value to check
|
36
|
+
# @param min_rate_percentage [Integer] Minimum allowed rate on the tax.
|
37
|
+
# @param max_rate_percentage [Integer] Maximum allowed rate on the tax.
|
38
|
+
# @return [Boolean]
|
39
|
+
def self.valid_percentage?(value, min_rate_percentage, max_rate_percentage)
|
40
|
+
return false if value.nil?
|
41
|
+
|
42
|
+
value > min_rate_percentage && value < max_rate_percentage
|
43
|
+
end
|
44
|
+
|
45
|
+
# Parses a percentage from a string, and returns it as a float.
|
46
|
+
# Returns nil if candidate isn't a valid percentage.
|
47
|
+
# @param percentage_str [String] String candidate.
|
48
|
+
# @return [Float, nil]
|
49
|
+
def self.parse_percentage(percentage_str)
|
50
|
+
percentage_str.gsub!('%', '')
|
51
|
+
percentage_str.strip
|
52
|
+
percentage_str.gsub!(',', '.')
|
53
|
+
Float(percentage_str.scrub)
|
54
|
+
rescue ArgumentError
|
55
|
+
nil
|
56
|
+
end
|
57
|
+
|
58
|
+
# Parses an amount from a string, and returns it as a float.
|
59
|
+
# Returns nil if candidate isn't a valid amount.
|
60
|
+
# @param amount_str [String] String candidate.
|
61
|
+
# @return [Float, nil]
|
62
|
+
def self.parse_amount(amount_str)
|
63
|
+
cleaned_str = amount_str.gsub(' ', '')
|
64
|
+
cleaned_str = standardize_delimiters(cleaned_str)
|
65
|
+
Float(cleaned_str)
|
66
|
+
rescue ArgumentError
|
67
|
+
nil
|
68
|
+
end
|
69
|
+
|
70
|
+
private
|
71
|
+
|
72
|
+
def self.standardize_delimiters(str)
|
73
|
+
if comma_decimal?(str)
|
74
|
+
str.gsub('.', '').gsub(',', '.')
|
75
|
+
elsif dot_decimal?(str)
|
76
|
+
str.gsub(',', '')
|
77
|
+
else
|
78
|
+
str
|
79
|
+
end
|
80
|
+
end
|
81
|
+
|
82
|
+
def self.comma_decimal?(str)
|
83
|
+
(str.length > 3 && str[-3] == ',') || str[-2] == ','
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.dot_decimal?(str)
|
87
|
+
(str.length > 3 && str[-3] == '.') || str[-2] == '.'
|
88
|
+
end
|
89
|
+
|
90
|
+
# Removes most common currency symbols from string
|
91
|
+
# @param input_string [String] string to remove the symbols from
|
92
|
+
# @return [String]
|
93
|
+
def self.remove_currency_symbols(input_string)
|
94
|
+
# Define an array of common currency symbols
|
95
|
+
currency_symbols = ['$', '€', '£', '¥', '₹', '₽', '฿', '₺', '₴', '₿', '₡', '₮', '₱', '₲', '₪', '₫', '₩', '₵',
|
96
|
+
'₦', '₢', '₤', '₣', '₧', '₯', '₠', '₶', '₸', '₷', '₼', '₾', '₺', '﹩', '₨', '₹', '$', '﹫']
|
97
|
+
|
98
|
+
# Iterate over each currency symbol and remove it from the input string
|
99
|
+
currency_symbols.each do |symbol|
|
100
|
+
input_string.gsub!(symbol, '')
|
101
|
+
end
|
102
|
+
|
103
|
+
input_string
|
104
|
+
end
|
105
|
+
|
106
|
+
private_class_method :remove_accents, :match_index, :parse_amount, :parse_percentage, :remove_currency_symbols,
|
107
|
+
:valid_percentage?, :comma_decimal?, :dot_decimal?, :standardize_delimiters
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|