mindee 3.10.0 → 3.12.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (45) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +14 -0
  3. data/bin/mindee.rb +7 -1
  4. data/docs/code_samples/financial_document_v1_async.txt +19 -0
  5. data/docs/code_samples/us_healthcare_cards_v1_async.txt +19 -0
  6. data/docs/expense_receipts_v5.md +12 -10
  7. data/docs/financial_document_v1.md +51 -22
  8. data/docs/invoices_v4.md +4 -3
  9. data/docs/us_healthcare_cards_v1.md +204 -0
  10. data/lib/mindee/extraction/ocr_extractor.rb +110 -0
  11. data/lib/mindee/extraction/tax_extractor.rb +322 -0
  12. data/lib/mindee/extraction.rb +3 -0
  13. data/lib/mindee/geometry/utils.rb +19 -0
  14. data/lib/mindee/image_extraction/common/extracted_image.rb +73 -0
  15. data/lib/mindee/image_extraction/common/image_extractor.rb +191 -0
  16. data/lib/mindee/image_extraction/common.rb +3 -0
  17. data/lib/mindee/image_extraction/multi_receipts_extractor/multi_receipts_extractor.rb +26 -0
  18. data/lib/mindee/image_extraction/multi_receipts_extractor.rb +3 -0
  19. data/lib/mindee/image_extraction.rb +4 -0
  20. data/lib/mindee/input/sources.rb +8 -0
  21. data/lib/mindee/parsing/common/api_response.rb +1 -1
  22. data/lib/mindee/parsing/common/ocr/mvision_v1.rb +16 -0
  23. data/lib/mindee/parsing/common/ocr/ocr.rb +10 -0
  24. data/lib/mindee/parsing/standard/company_registration_field.rb +17 -0
  25. data/lib/mindee/product/financial_document/financial_document_v1_document.rb +3 -1
  26. data/lib/mindee/product/financial_document/financial_document_v1_line_item.rb +7 -0
  27. data/lib/mindee/product/financial_document/financial_document_v1_page.rb +1 -1
  28. data/lib/mindee/product/international_id/international_id_v2_document.rb +1 -1
  29. data/lib/mindee/product/international_id/international_id_v2_page.rb +1 -1
  30. data/lib/mindee/product/invoice/invoice_v4_document.rb +3 -1
  31. data/lib/mindee/product/invoice/invoice_v4_line_item.rb +7 -0
  32. data/lib/mindee/product/invoice/invoice_v4_page.rb +1 -1
  33. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_document.rb +1 -1
  34. data/lib/mindee/product/multi_receipts_detector/multi_receipts_detector_v1_page.rb +1 -1
  35. data/lib/mindee/product/receipt/receipt_v5_document.rb +1 -1
  36. data/lib/mindee/product/receipt/receipt_v5_page.rb +1 -1
  37. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1.rb +41 -0
  38. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_copay.rb +57 -0
  39. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_document.rb +127 -0
  40. data/lib/mindee/product/us/healthcare_card/healthcare_card_v1_page.rb +34 -0
  41. data/lib/mindee/product.rb +1 -0
  42. data/lib/mindee/version.rb +1 -1
  43. data/lib/mindee.rb +5 -0
  44. data/mindee.gemspec +1 -0
  45. metadata +32 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdc0f8cc57ce32684b3da523ce0a6666a251ff90d7a6d894a7b4601231bed3e4
4
- data.tar.gz: e249e12de6c86cc4a7683392504d5cd914afb941a99b5b00bff3f90fd0f4ae36
3
+ metadata.gz: ef77c472e79a3139844c6e719cb368c081466ceed4ded849a39eefb27484ca78
4
+ data.tar.gz: 4d41a61f23ccad56d6f9e4a846feed4c67c613d17a80baf0191aec119eb0835a
5
5
  SHA512:
6
- metadata.gz: 713cf8cb6259b54637865f6799db1f5da7d7f27c436960316f84e2d404fe743d343d2d307feb2023c0fcab580421e5e0004bc70d34735d5800f0530d135c70ea
7
- data.tar.gz: 535dc099fe76a27b3af26bb5cb726756e1b158056027782b607820e2648b92dbc32e65cf453d356c33355c5b94a3df9ffbbd86fabfc18fd5f439b48efa68816c
6
+ metadata.gz: df1777eaec4a97d7524e0bf0cc3e2356169a9a62af6349a10abacc8d1977c83124cbdeafed2e1d94f4cef4e3986c5e488fea45c31c1048c3feed85cdc8e0fb74
7
+ data.tar.gz: 998987a5877b57d1ef2fc078c144f247369c2824b5a84334297e2bed81e61a77b894a863d5a4920d23cb3093b18f86bb0201b7e51a971136f3c2c1bb33c065ea
data/CHANGELOG.md CHANGED
@@ -1,5 +1,19 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v3.12.0 - 2024-07-24
4
+ ### Changes
5
+ * :sparkles: add support for Multi-Receipts Extraction
6
+ * :sparkles: add support for Healthcare Card V1
7
+ * :sparkles: add support for Invoice V4.7
8
+ * :sparkles: add support for Financial Document V1.9
9
+ * :recycle: update display for company registration fields
10
+
11
+
12
+ ## v3.11.0 - 2024-06-10
13
+ ### Changes
14
+ * :sparkles: add custom tax extraction feature (#76)
15
+
16
+
3
17
  ## v3.10.0 - 2024-05-31
4
18
  ### Changes
5
19
  * :sparkles: add support for us mail v2 (#98)
data/bin/mindee.rb CHANGED
@@ -35,7 +35,7 @@ DOCUMENTS = {
35
35
  description: 'Financial Document',
36
36
  doc_class: Mindee::Product::FinancialDocument::FinancialDocumentV1,
37
37
  sync: true,
38
- async: false,
38
+ async: true,
39
39
  },
40
40
  "invoice" => {
41
41
  description: 'Invoice',
@@ -115,6 +115,12 @@ DOCUMENTS = {
115
115
  sync: true,
116
116
  async: false,
117
117
  },
118
+ "us-heathcare-card" => {
119
+ description: "US Healthcare Card",
120
+ doc_class: Mindee::Product::US::HealthcareCard::HealthcareCardV1,
121
+ sync: false,
122
+ async: true
123
+ },
118
124
  "us-w9" => {
119
125
  description: "US W9 Form",
120
126
  doc_class: Mindee::Product::US::W9::W9V1,
@@ -0,0 +1,19 @@
1
+ require 'mindee'
2
+
3
+ # Init a new client
4
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
5
+
6
+ # Load a file from disk
7
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
8
+
9
+ # Parse the file
10
+ result = mindee_client.enqueue_and_parse(
11
+ input_source,
12
+ Mindee::Product::FinancialDocument::FinancialDocumentV1
13
+ )
14
+
15
+ # Print a full summary of the parsed data in RST format
16
+ puts result.document
17
+
18
+ # Print the document-level parsed data
19
+ # puts result.document.inference.prediction
@@ -0,0 +1,19 @@
1
+ require 'mindee'
2
+
3
+ # Init a new client
4
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
5
+
6
+ # Load a file from disk
7
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
8
+
9
+ # Parse the file
10
+ result = mindee_client.enqueue_and_parse(
11
+ input_source,
12
+ Mindee::Product::US::HealthcareCard::HealthcareCardV1
13
+ )
14
+
15
+ # Print a full summary of the parsed data in RST format
16
+ puts result.document
17
+
18
+ # Print the document-level parsed data
19
+ # puts result.document.inference.prediction
@@ -34,12 +34,12 @@ puts result.document
34
34
  ########
35
35
  Document
36
36
  ########
37
- :Mindee ID: ce41e37a-65d8-4de1-b34b-1c92ab04b1ae
37
+ :Mindee ID: d96fb043-8fb8-4adc-820c-387aae83376d
38
38
  :Filename: default_sample.jpg
39
39
 
40
40
  Inference
41
41
  #########
42
- :Product: mindee/expense_receipts v5.0
42
+ :Product: mindee/expense_receipts v5.3
43
43
  :Rotation applied: Yes
44
44
 
45
45
  Prediction
@@ -60,11 +60,12 @@ Prediction
60
60
  +===============+========+==========+===============+
61
61
  | 8.50 | VAT | 20.00 | 1.70 |
62
62
  +---------------+--------+----------+---------------+
63
- :Supplier Name: CLACHAN
64
- :Supplier Company Registrations: 232153895
65
- 232153895
66
- :Supplier Address: 34 kingley street w1b 5qh
63
+ :Supplier Name: clachan
64
+ :Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
65
+ Type: VAT NUMBER, Value: 232153895
66
+ :Supplier Address: 34 Kingley Street W1B 50H
67
67
  :Supplier Phone Number: 02074940834
68
+ :Receipt Number: 54/7500
68
69
  :Line Items:
69
70
  +--------------------------------------+----------+--------------+------------+
70
71
  | Description | Quantity | Total Amount | Unit Price |
@@ -93,11 +94,12 @@ Page 0
93
94
  +===============+========+==========+===============+
94
95
  | 8.50 | VAT | 20.00 | 1.70 |
95
96
  +---------------+--------+----------+---------------+
96
- :Supplier Name: CLACHAN
97
- :Supplier Company Registrations: 232153895
98
- 232153895
99
- :Supplier Address: 34 kingley street w1b 5qh
97
+ :Supplier Name: clachan
98
+ :Supplier Company Registrations: Type: VAT NUMBER, Value: 232153895
99
+ Type: VAT NUMBER, Value: 232153895
100
+ :Supplier Address: 34 Kingley Street W1B 50H
100
101
  :Supplier Phone Number: 02074940834
102
+ :Receipt Number: 54/7500
101
103
  :Line Items:
102
104
  +--------------------------------------+----------+--------------+------------+
103
105
  | Description | Quantity | Total Amount | Unit Price |
@@ -29,23 +29,49 @@ puts result.document
29
29
  # puts result.document.inference.prediction
30
30
  ```
31
31
 
32
+ You can also call this product asynchronously:
33
+
34
+ ```rb
35
+ require 'mindee'
36
+
37
+ # Init a new client
38
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
39
+
40
+ # Load a file from disk
41
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
42
+
43
+ # Parse the file
44
+ result = mindee_client.enqueue_and_parse(
45
+ input_source,
46
+ Mindee::Product::FinancialDocument::FinancialDocumentV1
47
+ )
48
+
49
+ # Print a full summary of the parsed data in RST format
50
+ puts result.document
51
+
52
+ # Print the document-level parsed data
53
+ # puts result.document.inference.prediction
54
+ ```
55
+
32
56
  **Output (RST):**
33
57
  ```rst
34
58
  ########
35
59
  Document
36
60
  ########
37
- :Mindee ID: 503895c6-eced-42e2-a6fc-0292b7ccf680
61
+ :Mindee ID: 3859a462-e05f-4f4c-a736-febca66b9aa9
38
62
  :Filename: default_sample.jpg
39
63
 
40
64
  Inference
41
65
  #########
42
- :Product: mindee/financial_document v1.6
66
+ :Product: mindee/financial_document v1.9
43
67
  :Rotation applied: Yes
44
68
 
45
69
  Prediction
46
70
  ==========
47
- :Locale: en; USD;
71
+ :Locale: en; en; USD;
48
72
  :Invoice Number: INT-001
73
+ :Receipt Number:
74
+ :Document Number: INT-001
49
75
  :Reference Numbers: 2412/2019
50
76
  :Purchase Date: 2019-11-02
51
77
  :Due Date: 2019-02-26
@@ -77,23 +103,25 @@ Prediction
77
103
  :Tip and Gratuity:
78
104
  :Purchase Time:
79
105
  :Line Items:
80
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
81
- | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
82
- +======================================+==============+==========+============+==============+==============+============+
83
- | Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
84
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
85
- | New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
86
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
87
- | Labor 3hrs | | 3.00 | | | 45.00 | 15.00 |
88
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
106
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
107
+ | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
108
+ +======================================+==============+==========+============+==============+==============+=================+============+
109
+ | Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 |
110
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
111
+ | New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 |
112
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
113
+ | Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 |
114
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
89
115
 
90
116
  Page Predictions
91
117
  ================
92
118
 
93
119
  Page 0
94
120
  ------
95
- :Locale: en; USD;
121
+ :Locale: en; en; USD;
96
122
  :Invoice Number: INT-001
123
+ :Receipt Number:
124
+ :Document Number: INT-001
97
125
  :Reference Numbers: 2412/2019
98
126
  :Purchase Date: 2019-11-02
99
127
  :Due Date: 2019-02-26
@@ -125,15 +153,15 @@ Page 0
125
153
  :Tip and Gratuity:
126
154
  :Purchase Time:
127
155
  :Line Items:
128
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
129
- | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit Price |
130
- +======================================+==============+==========+============+==============+==============+============+
131
- | Front and rear brake cables | | 1.00 | | | 100.00 | 100.00 |
132
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
133
- | New set of pedal arms | | 2.00 | | | 50.00 | 25.00 |
134
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
135
- | Labor 3hrs | | 3.00 | | | 45.00 | 15.00 |
136
- +--------------------------------------+--------------+----------+------------+--------------+--------------+------------+
156
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
157
+ | Description | Product code | Quantity | Tax Amount | Tax Rate (%) | Total Amount | Unit of measure | Unit Price |
158
+ +======================================+==============+==========+============+==============+==============+=================+============+
159
+ | Front and rear brake cables | | 1.00 | | | 100.00 | | 100.00 |
160
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
161
+ | New set of pedal arms | | 2.00 | | | 50.00 | | 25.00 |
162
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
163
+ | Labor 3hrs | | 3.00 | | | 45.00 | | 15.00 |
164
+ +--------------------------------------+--------------+----------+------------+--------------+--------------+-----------------+------------+
137
165
  ```
138
166
 
139
167
  # Field Types
@@ -220,6 +248,7 @@ A `FinancialDocumentV1LineItem` implements the following attributes:
220
248
  * `tax_amount` (Float): The item tax amount.
221
249
  * `tax_rate` (Float): The item tax rate in percentage.
222
250
  * `total_amount` (Float): The item total amount.
251
+ * `unit_measure` (String): The item unit of measure.
223
252
  * `unit_price` (Float): The item unit price.
224
253
 
225
254
  # Attributes
data/docs/invoices_v4.md CHANGED
@@ -34,7 +34,7 @@ puts result.document
34
34
  ########
35
35
  Document
36
36
  ########
37
- :Mindee ID: 5c0371d0-1433-43a4-b8fb-a3b11aaf9a0e
37
+ :Mindee ID: 128a314f-1adb-42eb-a9e3-402055a8b8ce
38
38
  :Filename: default_sample.jpg
39
39
 
40
40
  Inference
@@ -49,7 +49,7 @@ Prediction
49
49
  :Reference Numbers: AD29094
50
50
  :Purchase Date: 2018-09-25
51
51
  :Due Date:
52
- :Total Net:
52
+ :Total Net: 2145.00
53
53
  :Total Amount: 2608.20
54
54
  :Total Tax: 193.20
55
55
  :Taxes:
@@ -93,7 +93,7 @@ Page 0
93
93
  :Reference Numbers: AD29094
94
94
  :Purchase Date: 2018-09-25
95
95
  :Due Date:
96
- :Total Net:
96
+ :Total Net: 2145.00
97
97
  :Total Amount: 2608.20
98
98
  :Total Tax: 193.20
99
99
  :Taxes:
@@ -212,6 +212,7 @@ A `InvoiceV4LineItem` implements the following attributes:
212
212
  * `tax_amount` (Float): The item tax amount.
213
213
  * `tax_rate` (Float): The item tax rate in percentage.
214
214
  * `total_amount` (Float): The item total amount.
215
+ * `unit_measure` (String): The item unit of measure.
215
216
  * `unit_price` (Float): The item unit price.
216
217
 
217
218
  # Attributes
@@ -0,0 +1,204 @@
1
+ ---
2
+ title: US Healthcare Card OCR Ruby
3
+ ---
4
+ The Ruby OCR SDK supports the [Healthcare Card API](https://platform.mindee.com/mindee/us_healthcare_cards).
5
+
6
+ Using the [sample below](https://github.com/mindee/client-lib-test-data/blob/main/products/us_healthcare_cards/default_sample.jpg), we are going to illustrate how to extract the data that we want using the OCR SDK.
7
+ ![Healthcare Card sample](https://github.com/mindee/client-lib-test-data/blob/main/products/us_healthcare_cards/default_sample.jpg?raw=true)
8
+
9
+ # Quick-Start
10
+ ```rb
11
+ require 'mindee'
12
+
13
+ # Init a new client
14
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
15
+
16
+ # Load a file from disk
17
+ input_source = mindee_client.source_from_path('/path/to/the/file.ext')
18
+
19
+ # Parse the file
20
+ result = mindee_client.enqueue_and_parse(
21
+ input_source,
22
+ Mindee::Product::US::HealthcareCard::HealthcareCardV1
23
+ )
24
+
25
+ # Print a full summary of the parsed data in RST format
26
+ puts result.document
27
+
28
+ # Print the document-level parsed data
29
+ # puts result.document.inference.prediction
30
+ ```
31
+
32
+ **Output (RST):**
33
+ ```rst
34
+ ########
35
+ Document
36
+ ########
37
+ :Mindee ID: 0ced9f49-00c0-4a1d-8221-4a1538813a95
38
+ :Filename: default_sample.jpg
39
+
40
+ Inference
41
+ #########
42
+ :Product: mindee/us_healthcare_cards v1.0
43
+ :Rotation applied: No
44
+
45
+ Prediction
46
+ ==========
47
+ :Company Name: UnitedHealthcare
48
+ :Member Name: SUBSCRIBER SMITH
49
+ :Member ID: 123456789
50
+ :Issuer 80840:
51
+ :Dependents: SPOUSE SMITH
52
+ CHILD1 SMITH
53
+ CHILD2 SMITH
54
+ CHILD3 SMITH
55
+ :Group Number: 98765
56
+ :Payer ID: 87726
57
+ :RX BIN: 610279
58
+ :RX GRP: UHEALTH
59
+ :RX PCN: 9999
60
+ :copays:
61
+ +--------------+--------------+
62
+ | Service Fees | Service Name |
63
+ +==============+==============+
64
+ | 20.00 | office visit |
65
+ +--------------+--------------+
66
+ | 300.00 | emergency |
67
+ +--------------+--------------+
68
+ | 75.00 | urgent care |
69
+ +--------------+--------------+
70
+ | 30.00 | specialist |
71
+ +--------------+--------------+
72
+ :Enrollment Date: 2023-09-13
73
+ ```
74
+
75
+ # Field Types
76
+ ## Standard Fields
77
+ These fields are generic and used in several products.
78
+
79
+ ### Basic Field
80
+ Each prediction object contains a set of fields that inherit from the generic `Field` class.
81
+ A typical `Field` object will have the following attributes:
82
+
83
+ * **value** (`String`, `Float`, `Integer`, `Boolean`): corresponds to the field value. Can be `nil` if no value was extracted.
84
+ * **confidence** (Float, nil): the confidence score of the field prediction.
85
+ * **bounding_box** (`Mindee::Geometry::Quadrilateral`, `nil`): contains exactly 4 relative vertices (points) coordinates of a right rectangle containing the field in the document.
86
+ * **polygon** (`Mindee::Geometry::Polygon`, `nil`): contains the relative vertices coordinates (`Point`) of a polygon containing the field in the image.
87
+ * **page_id** (`Integer`, `nil`): the ID of the page, is `nil` when at document-level.
88
+ * **reconstructed** (`Boolean`): indicates whether an object was reconstructed (not extracted as the API gave it).
89
+
90
+
91
+ Aside from the previous attributes, all basic fields have access to a `to_s` method that can be used to print their value as a string.
92
+
93
+ ### Date Field
94
+ Aside from the basic `Field` attributes, the date field `DateField` also implements the following:
95
+
96
+ * **date_object** (`Date`): an accessible representation of the value as a JavaScript object.
97
+
98
+ ### String Field
99
+ The text field `StringField` only has one constraint: it's **value** is a `String` (or `nil`).
100
+
101
+ ## Specific Fields
102
+ Fields which are specific to this product; they are not used in any other product.
103
+
104
+ ### copays Field
105
+ Is a fixed amount for a covered service.
106
+
107
+ A `HealthcareCardV1Copay` implements the following attributes:
108
+
109
+ * `service_fees` (Float): The price of service.
110
+ * `service_name` (String): The name of service of the copay.
111
+
112
+ # Attributes
113
+ The following fields are extracted for Healthcare Card V1:
114
+
115
+ ## Company Name
116
+ **company_name** ([StringField](#string-field)): The name of the company that provides the healthcare plan.
117
+
118
+ ```rb
119
+ puts result.document.inference.prediction.company_name.value
120
+ ```
121
+
122
+ ## copays
123
+ **copays** (Array<[HealthcareCardV1Copay](#copays-field)>): Is a fixed amount for a covered service.
124
+
125
+ ```rb
126
+ for copays_elem in result.document.inference.prediction.copays do
127
+ puts copays_elem.value
128
+ end
129
+ ```
130
+
131
+ ## Dependents
132
+ **dependents** (Array<[StringField](#string-field)>): The list of dependents covered by the healthcare plan.
133
+
134
+ ```rb
135
+ for dependents_elem in result.document.inference.prediction.dependents do
136
+ puts dependents_elem.value
137
+ end
138
+ ```
139
+
140
+ ## Enrollment Date
141
+ **enrollment_date** ([DateField](#date-field)): The date when the member enrolled in the healthcare plan.
142
+
143
+ ```rb
144
+ puts result.document.inference.prediction.enrollment_date.value
145
+ ```
146
+
147
+ ## Group Number
148
+ **group_number** ([StringField](#string-field)): The group number associated with the healthcare plan.
149
+
150
+ ```rb
151
+ puts result.document.inference.prediction.group_number.value
152
+ ```
153
+
154
+ ## Issuer 80840
155
+ **issuer80840** ([StringField](#string-field)): The organization that issued the healthcare plan.
156
+
157
+ ```rb
158
+ puts result.document.inference.prediction.issuer80840.value
159
+ ```
160
+
161
+ ## Member ID
162
+ **member_id** ([StringField](#string-field)): The unique identifier for the member in the healthcare system.
163
+
164
+ ```rb
165
+ puts result.document.inference.prediction.member_id.value
166
+ ```
167
+
168
+ ## Member Name
169
+ **member_name** ([StringField](#string-field)): The name of the member covered by the healthcare plan.
170
+
171
+ ```rb
172
+ puts result.document.inference.prediction.member_name.value
173
+ ```
174
+
175
+ ## Payer ID
176
+ **payer_id** ([StringField](#string-field)): The unique identifier for the payer in the healthcare system.
177
+
178
+ ```rb
179
+ puts result.document.inference.prediction.payer_id.value
180
+ ```
181
+
182
+ ## RX BIN
183
+ **rx_bin** ([StringField](#string-field)): The BIN number for prescription drug coverage.
184
+
185
+ ```rb
186
+ puts result.document.inference.prediction.rx_bin.value
187
+ ```
188
+
189
+ ## RX GRP
190
+ **rx_grp** ([StringField](#string-field)): The group number for prescription drug coverage.
191
+
192
+ ```rb
193
+ puts result.document.inference.prediction.rx_grp.value
194
+ ```
195
+
196
+ ## RX PCN
197
+ **rx_pcn** ([StringField](#string-field)): The PCN number for prescription drug coverage.
198
+
199
+ ```rb
200
+ puts result.document.inference.prediction.rx_pcn.value
201
+ ```
202
+
203
+ # Questions?
204
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-2d0ds7dtz-DPAF81ZqTy20chsYpQBW5g)
@@ -0,0 +1,110 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ module Extraction
5
+ # Generic extractor class
6
+ class OcrExtractor
7
+ # Checks for a list of possible matches in a string & returns the index of the first found candidate.
8
+ # Case & diacritics insensitive.
9
+ # @param text [String] string to search for matches.
10
+ # @param str_candidates [Array<String>] array of values to look for
11
+ # @return [Integer, nil]
12
+ def self.match_index(text, str_candidates)
13
+ idx = nil
14
+ str_candidates.each do |str_candidate|
15
+ found_idx = remove_accents(text.downcase).index(remove_accents(str_candidate.downcase))
16
+ idx = found_idx if idx.nil?
17
+ idx = found_idx if !found_idx.nil? && found_idx >= idx
18
+ end
19
+ idx
20
+ end
21
+
22
+ # Normalizes text by removing diacritics.
23
+ # @param input_str [String] string to handle.
24
+ # @return [String]
25
+ def self.remove_accents(input_str)
26
+ diacritics = [*0x1DC0..0x1DFF, *0x0300..0x036F, *0xFE20..0xFE2F].pack('U*')
27
+ input_str
28
+ .unicode_normalize(:nfd)
29
+ .tr(diacritics, '')
30
+ .unicode_normalize(:nfc)
31
+ .scrub
32
+ end
33
+
34
+ # Checks if a given percentage value is within the allowed range
35
+ # @param value [Integer] The value to check
36
+ # @param min_rate_percentage [Integer] Minimum allowed rate on the tax.
37
+ # @param max_rate_percentage [Integer] Maximum allowed rate on the tax.
38
+ # @return [Boolean]
39
+ def self.valid_percentage?(value, min_rate_percentage, max_rate_percentage)
40
+ return false if value.nil?
41
+
42
+ value > min_rate_percentage && value < max_rate_percentage
43
+ end
44
+
45
+ # Parses a percentage from a string, and returns it as a float.
46
+ # Returns nil if candidate isn't a valid percentage.
47
+ # @param percentage_str [String] String candidate.
48
+ # @return [Float, nil]
49
+ def self.parse_percentage(percentage_str)
50
+ percentage_str.gsub!('%', '')
51
+ percentage_str.strip
52
+ percentage_str.gsub!(',', '.')
53
+ Float(percentage_str.scrub)
54
+ rescue ArgumentError
55
+ nil
56
+ end
57
+
58
+ # Parses an amount from a string, and returns it as a float.
59
+ # Returns nil if candidate isn't a valid amount.
60
+ # @param amount_str [String] String candidate.
61
+ # @return [Float, nil]
62
+ def self.parse_amount(amount_str)
63
+ cleaned_str = amount_str.gsub(' ', '')
64
+ cleaned_str = standardize_delimiters(cleaned_str)
65
+ Float(cleaned_str)
66
+ rescue ArgumentError
67
+ nil
68
+ end
69
+
70
+ private
71
+
72
+ def self.standardize_delimiters(str)
73
+ if comma_decimal?(str)
74
+ str.gsub('.', '').gsub(',', '.')
75
+ elsif dot_decimal?(str)
76
+ str.gsub(',', '')
77
+ else
78
+ str
79
+ end
80
+ end
81
+
82
+ def self.comma_decimal?(str)
83
+ (str.length > 3 && str[-3] == ',') || str[-2] == ','
84
+ end
85
+
86
+ def self.dot_decimal?(str)
87
+ (str.length > 3 && str[-3] == '.') || str[-2] == '.'
88
+ end
89
+
90
+ # Removes most common currency symbols from string
91
+ # @param input_string [String] string to remove the symbols from
92
+ # @return [String]
93
+ def self.remove_currency_symbols(input_string)
94
+ # Define an array of common currency symbols
95
+ currency_symbols = ['$', '€', '£', '¥', '₹', '₽', '฿', '₺', '₴', '₿', '₡', '₮', '₱', '₲', '₪', '₫', '₩', '₵',
96
+ '₦', '₢', '₤', '₣', '₧', '₯', '₠', '₶', '₸', '₷', '₼', '₾', '₺', '﹩', '₨', '₹', '$', '﹫']
97
+
98
+ # Iterate over each currency symbol and remove it from the input string
99
+ currency_symbols.each do |symbol|
100
+ input_string.gsub!(symbol, '')
101
+ end
102
+
103
+ input_string
104
+ end
105
+
106
+ private_class_method :remove_accents, :match_index, :parse_amount, :parse_percentage, :remove_currency_symbols,
107
+ :valid_percentage?, :comma_decimal?, :dot_decimal?, :standardize_delimiters
108
+ end
109
+ end
110
+ end