mindee 1.2.0 → 2.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +26 -0
- data/README.md +46 -23
- data/Rakefile +6 -1
- data/bin/mindee.rb +78 -61
- data/docs/ruby-api-builder.md +124 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +260 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +132 -93
- data/lib/mindee/document_config.rb +29 -169
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +245 -0
- data/lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +82 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +87 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +14 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +2 -1
- metadata +60 -24
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 7a4e7590c321e473df6da717d6854caf198ee8dc3d818502d6e588d14497c0da
|
|
4
|
+
data.tar.gz: b9922d76cbc0115dff59489b9c77757decd69fcfd8a6da445e0a38229cd85023
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 01e9dce9c2ba44dea757f1061c4a368b9a675f75d8ab54dd74c06c7c70589aaa80fd320a241c51f618efec9d6ebae8aa9fa033233731e5c214e607464821deb1
|
|
7
|
+
data.tar.gz: 0467da78b85085e0df6cdae33e6135be4dd1be6c8d28f2c2888127c2340c114b404314f80741f0304aca4d34b19c6462ae8798fcb3936d5f2130860610b4ded3
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
|
@@ -26,7 +26,7 @@ Metrics/BlockLength:
|
|
|
26
26
|
- '**/*.gemspec'
|
|
27
27
|
|
|
28
28
|
Metrics/MethodLength:
|
|
29
|
-
Max:
|
|
29
|
+
Max: 45
|
|
30
30
|
|
|
31
31
|
Metrics/ClassLength:
|
|
32
32
|
Max: 200
|
|
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
|
|
|
35
35
|
Max: 7
|
|
36
36
|
|
|
37
37
|
Metrics/AbcSize:
|
|
38
|
-
Max:
|
|
38
|
+
Max: 60
|
|
39
39
|
|
|
40
40
|
Style/RegexpLiteral:
|
|
41
41
|
EnforcedStyle: percent_r
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
|
@@ -1,20 +1,46 @@
|
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
|
2
2
|
|
|
3
|
+
## v2.1.0 - 2023-01-30
|
|
4
|
+
### Changes
|
|
5
|
+
* :sparkles: Add financial document v1 support (Co-authored-by: Oriol Gual)
|
|
6
|
+
* :sparkles: Add Proof of Address v1 support
|
|
7
|
+
|
|
8
|
+
## v2.0.0 - 2023-01-13
|
|
9
|
+
### ¡Breaking Changes!
|
|
10
|
+
* :sparkles: add improved PDF merge system
|
|
11
|
+
* :boom: it should be up to the user to handle API errors
|
|
12
|
+
* :wastebasket: remove deprecated APIs
|
|
13
|
+
* :recycle: refactor CLI tool
|
|
14
|
+
|
|
15
|
+
### Additions
|
|
16
|
+
* :sparkles: add support for Invoice v4.1 and Receipt v4.1
|
|
17
|
+
* :sparkles: add EU license plates
|
|
18
|
+
* :sparkles: add shipping containers support
|
|
19
|
+
* :sparkles: add US bank check support
|
|
20
|
+
* :sparkles: add all French documents
|
|
21
|
+
* :memo: Add YARD for generating docs
|
|
22
|
+
* :white_check_mark: add testing on Ruby 3.2
|
|
23
|
+
* :sparkles: allow setting the request timeout from env
|
|
24
|
+
|
|
3
25
|
## v1.2.0 - 2022-12-26
|
|
4
26
|
### Changes
|
|
5
27
|
* :arrow_up: switch to origamindee => adds support for Ruby 3
|
|
6
28
|
|
|
29
|
+
|
|
7
30
|
## v1.1.2 - 2022-12-23
|
|
8
31
|
### Changes
|
|
9
32
|
* :recycle: use of `append_page` is better for adding pages to a new PDF
|
|
10
33
|
|
|
34
|
+
|
|
11
35
|
## v1.1.1 - 2022-08-08
|
|
12
36
|
### Fixes
|
|
13
37
|
* :bug: Fix for missing attribute accessor
|
|
14
38
|
|
|
39
|
+
|
|
15
40
|
## v1.1.0 - 2022-08-04
|
|
16
41
|
### Changes
|
|
17
42
|
* :sparkles: Add support for custom API classification field (#5)
|
|
18
43
|
|
|
44
|
+
|
|
19
45
|
## v1.0.0 - 2022-07-28
|
|
20
46
|
* :tada: First release!
|
data/README.md
CHANGED
|
@@ -1,13 +1,10 @@
|
|
|
1
|
-
[](https://opensource.org/licenses/MIT)
|
|
2
|
-
[](https://github.com/mindee/mindee-api-ruby)
|
|
3
|
-
[](https://rubygems.org/gems/mindee)
|
|
4
|
-
[](https://rubygems.org/gems/mindee)
|
|
1
|
+
[](https://opensource.org/licenses/MIT) [](https://github.com/mindee/mindee-api-ruby) [](https://rubygems.org/gems/mindee) [](https://rubygems.org/gems/mindee)
|
|
5
2
|
|
|
6
3
|
# Mindee API Helper Library for Ruby
|
|
7
4
|
Quickly and easily connect to Mindee's API services using Ruby.
|
|
8
5
|
|
|
9
6
|
## Requirements
|
|
10
|
-
The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1
|
|
7
|
+
The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
|
|
11
8
|
|
|
12
9
|
## Quick Start
|
|
13
10
|
Here's the TL;DR of getting started.
|
|
@@ -24,25 +21,38 @@ And then execute:
|
|
|
24
21
|
|
|
25
22
|
$ bundle install
|
|
26
23
|
|
|
27
|
-
|
|
24
|
+
Finally, Ruby away!
|
|
28
25
|
|
|
29
|
-
|
|
26
|
+
### Loading a File and Parsing It
|
|
30
27
|
|
|
31
|
-
|
|
28
|
+
#### Global Documents
|
|
29
|
+
```ruby
|
|
30
|
+
require 'mindee'
|
|
31
|
+
|
|
32
|
+
# Init a new client
|
|
33
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
32
34
|
|
|
33
|
-
|
|
35
|
+
# Load a file from disk and parse it
|
|
36
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
|
37
|
+
.parse(Mindee::Prediction::InvoiceV4)
|
|
38
|
+
|
|
39
|
+
# Print a full summary of the parsed data in RST format
|
|
40
|
+
puts result
|
|
41
|
+
```
|
|
42
|
+
|
|
43
|
+
#### Region-Specific Documents
|
|
34
44
|
```ruby
|
|
35
45
|
require 'mindee'
|
|
36
46
|
|
|
37
|
-
# Init a new client
|
|
38
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
47
|
+
# Init a new client
|
|
48
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
|
39
49
|
|
|
40
50
|
# Load a file from disk and parse it
|
|
41
|
-
|
|
42
|
-
.parse(
|
|
51
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
|
52
|
+
.parse(Mindee::Prediction::EU::LicensePlateV1)
|
|
43
53
|
|
|
44
|
-
# Print a
|
|
45
|
-
puts
|
|
54
|
+
# Print a full summary of the parsed data in RST format
|
|
55
|
+
puts result.document
|
|
46
56
|
```
|
|
47
57
|
|
|
48
58
|
### Custom Document (API Builder)
|
|
@@ -50,27 +60,40 @@ puts api_response.document
|
|
|
50
60
|
require 'mindee'
|
|
51
61
|
|
|
52
62
|
# Init a new client and configure your custom document
|
|
53
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key').
|
|
63
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
|
54
64
|
'john',
|
|
55
65
|
'wnine'
|
|
56
66
|
)
|
|
57
67
|
|
|
58
68
|
# Load a file from disk and parse it
|
|
59
|
-
|
|
60
|
-
.parse('wnine')
|
|
61
|
-
|
|
62
|
-
# Print a
|
|
63
|
-
puts
|
|
69
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
|
70
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
|
71
|
+
|
|
72
|
+
# Print a full summary of the parsed data in RST format
|
|
73
|
+
puts result
|
|
74
|
+
|
|
75
|
+
# Looping over all prediction values
|
|
76
|
+
result.inference.prediction.fields.each do |field_name, field_data|
|
|
77
|
+
puts field_name
|
|
78
|
+
puts field_data.values
|
|
79
|
+
puts field_data.to_s
|
|
80
|
+
end
|
|
64
81
|
```
|
|
65
82
|
|
|
66
83
|
## Further Reading
|
|
67
84
|
There's more to it than that for those that need more features, or want to
|
|
68
85
|
customize the experience.
|
|
69
86
|
|
|
70
|
-
|
|
71
|
-
|
|
87
|
+
- [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
|
|
88
|
+
- [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
|
|
89
|
+
- [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
|
|
90
|
+
- [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
|
|
91
|
+
- [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
|
|
72
92
|
|
|
73
93
|
## License
|
|
74
94
|
Copyright © Mindee, SA
|
|
75
95
|
|
|
76
96
|
Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
|
97
|
+
|
|
98
|
+
## Questions?
|
|
99
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/Rakefile
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'rake'
|
|
4
|
+
require 'rspec/core/rake_task'
|
|
5
|
+
require 'yard'
|
|
4
6
|
|
|
5
7
|
begin
|
|
6
8
|
require 'bundler/setup'
|
|
@@ -11,5 +13,8 @@ end
|
|
|
11
13
|
|
|
12
14
|
task default: :spec
|
|
13
15
|
|
|
14
|
-
require 'rspec/core/rake_task'
|
|
15
16
|
RSpec::Core::RakeTask.new(:spec)
|
|
17
|
+
|
|
18
|
+
YARD::Rake::YardocTask.new do |task|
|
|
19
|
+
task.files = ['lib/**/*.rb']
|
|
20
|
+
end
|
data/bin/mindee.rb
CHANGED
|
@@ -6,28 +6,60 @@ require 'optparse'
|
|
|
6
6
|
require 'mindee'
|
|
7
7
|
|
|
8
8
|
DOCUMENTS = {
|
|
9
|
+
"custom" => {
|
|
10
|
+
help: "Custom document type from API builder",
|
|
11
|
+
prediction: Mindee::Prediction::CustomV1,
|
|
12
|
+
},
|
|
13
|
+
"proof-of-address" => {
|
|
14
|
+
help: 'Proof of Address',
|
|
15
|
+
prediction: Mindee::Prediction::ProofOfAddressV1,
|
|
16
|
+
},
|
|
17
|
+
"financial-document" => {
|
|
18
|
+
help: 'Financial Document',
|
|
19
|
+
prediction: Mindee::Prediction::FinancialDocumentV1,
|
|
20
|
+
},
|
|
9
21
|
"invoice" => {
|
|
10
22
|
help: 'Invoice',
|
|
11
|
-
|
|
23
|
+
prediction: Mindee::Prediction::InvoiceV4,
|
|
12
24
|
},
|
|
13
25
|
"receipt" => {
|
|
14
26
|
help: "Expense Receipt",
|
|
15
|
-
|
|
27
|
+
prediction: Mindee::Prediction::ReceiptV4,
|
|
16
28
|
},
|
|
17
29
|
"passport" => {
|
|
18
30
|
help: "Passport",
|
|
19
|
-
|
|
31
|
+
prediction: Mindee::Prediction::PassportV1,
|
|
20
32
|
},
|
|
21
|
-
"
|
|
22
|
-
help: "
|
|
23
|
-
|
|
33
|
+
"shipping-container" => {
|
|
34
|
+
help: "Shipping Container",
|
|
35
|
+
prediction: Mindee::Prediction::ShippingContainerV1,
|
|
24
36
|
},
|
|
25
|
-
"
|
|
26
|
-
help: "
|
|
37
|
+
"eu-license-plate" => {
|
|
38
|
+
help: "EU License Plate",
|
|
39
|
+
prediction: Mindee::Prediction::EU::LicensePlateV1,
|
|
40
|
+
},
|
|
41
|
+
"fr-bank-account-details" => {
|
|
42
|
+
help: "FR Bank Account Details",
|
|
43
|
+
prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
|
|
44
|
+
},
|
|
45
|
+
"fr-carte-vitale" => {
|
|
46
|
+
help: "FR Carte Vitale",
|
|
47
|
+
prediction: Mindee::Prediction::FR::CarteVitaleV1,
|
|
48
|
+
},
|
|
49
|
+
"fr-id-card" => {
|
|
50
|
+
help: "FR ID Card",
|
|
51
|
+
prediction: Mindee::Prediction::FR::IdCardV1,
|
|
52
|
+
},
|
|
53
|
+
"us-bank-check" => {
|
|
54
|
+
help: "US Bank Check",
|
|
55
|
+
prediction: Mindee::Prediction::US::BankCheckV1,
|
|
27
56
|
},
|
|
28
57
|
}
|
|
29
58
|
|
|
30
|
-
options = {
|
|
59
|
+
options = {
|
|
60
|
+
api_key: '',
|
|
61
|
+
print_full: false,
|
|
62
|
+
}
|
|
31
63
|
|
|
32
64
|
def ots_subcommand(command, options)
|
|
33
65
|
OptionParser.new do |opt|
|
|
@@ -38,20 +70,20 @@ def ots_subcommand(command, options)
|
|
|
38
70
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
|
39
71
|
options[:include_words] = v
|
|
40
72
|
end
|
|
41
|
-
opt.on('-
|
|
42
|
-
options[:
|
|
73
|
+
opt.on('-c', '--cut-pages', "Cut document pages") do |v|
|
|
74
|
+
options[:cut_pages] = v
|
|
43
75
|
end
|
|
44
76
|
end
|
|
45
77
|
end
|
|
46
78
|
|
|
47
79
|
def custom_subcommand(options)
|
|
48
80
|
OptionParser.new do |opt|
|
|
49
|
-
opt.banner = "Usage: custom [options]
|
|
81
|
+
opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
|
|
50
82
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
|
51
83
|
options[:include_words] = v
|
|
52
84
|
end
|
|
53
|
-
opt.on('-
|
|
54
|
-
options[:
|
|
85
|
+
opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
|
|
86
|
+
options[:cut_pages] = v
|
|
55
87
|
end
|
|
56
88
|
opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
|
|
57
89
|
options[:api_key] = v
|
|
@@ -59,79 +91,64 @@ def custom_subcommand(options)
|
|
|
59
91
|
opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
|
|
60
92
|
options[:version] = v
|
|
61
93
|
end
|
|
62
|
-
opt.on('-
|
|
63
|
-
options[:
|
|
94
|
+
opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
|
|
95
|
+
options[:account_name] = v
|
|
64
96
|
end
|
|
65
97
|
end
|
|
66
98
|
end
|
|
67
99
|
|
|
68
|
-
def new_ots_client(options, command)
|
|
69
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
|
70
|
-
mindee_client = Mindee::Client.new(
|
|
71
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
|
72
|
-
)
|
|
73
|
-
info = DOCUMENTS[command]
|
|
74
|
-
mindee_client.send("config_#{info[:doc_type]}")
|
|
75
|
-
end
|
|
76
|
-
|
|
77
|
-
def new_custom_client(options, doc_type)
|
|
78
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
|
79
|
-
mindee_client = Mindee::Client.new(
|
|
80
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
|
81
|
-
)
|
|
82
|
-
mindee_client.config_custom_doc(
|
|
83
|
-
doc_type,
|
|
84
|
-
options[:user],
|
|
85
|
-
version: options[:version] || '1'
|
|
86
|
-
)
|
|
87
|
-
end
|
|
88
|
-
|
|
89
100
|
global_parser = OptionParser.new do |opt|
|
|
90
101
|
opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
|
|
91
102
|
opt.separator('')
|
|
92
103
|
opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
|
|
93
104
|
opt.separator('')
|
|
94
|
-
opt.on('-
|
|
95
|
-
options[:
|
|
96
|
-
|
|
105
|
+
opt.on('-f', '--full', "Print the full data, including pages") do |v|
|
|
106
|
+
options[:print_full] = true
|
|
107
|
+
end
|
|
97
108
|
end
|
|
98
109
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
begin
|
|
109
|
-
global_parser.order!
|
|
110
|
-
command = ARGV.shift
|
|
111
|
-
subcommands[command].order!
|
|
112
|
-
rescue NoMethodError => e
|
|
110
|
+
global_parser.order!
|
|
111
|
+
command = ARGV.shift
|
|
112
|
+
if command == 'custom'
|
|
113
|
+
custom_subcommand(options).order!
|
|
114
|
+
elsif DOCUMENTS.keys.include? command || ''
|
|
115
|
+
ots_subcommand(command, options).order!
|
|
116
|
+
else
|
|
113
117
|
$stderr.puts global_parser
|
|
114
118
|
exit(1)
|
|
115
119
|
end
|
|
116
120
|
|
|
121
|
+
mindee_client = Mindee::Client.new(api_key: options[:api_key])
|
|
122
|
+
|
|
117
123
|
if command == 'custom'
|
|
118
124
|
if ARGV.length != 2
|
|
119
|
-
$stderr.puts "The 'custom' command requires both
|
|
125
|
+
$stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
|
|
120
126
|
exit(1)
|
|
121
127
|
end
|
|
122
128
|
doc_type = ARGV[0]
|
|
123
129
|
file_path = ARGV[1]
|
|
124
|
-
mindee_client
|
|
130
|
+
mindee_client.add_endpoint(
|
|
131
|
+
options[:account_name], doc_type, version: options[:version] || '1',
|
|
132
|
+
)
|
|
125
133
|
else
|
|
126
134
|
if ARGV.length != 1
|
|
127
135
|
$stderr.puts 'No file specified.'
|
|
128
136
|
exit(1)
|
|
129
137
|
end
|
|
130
|
-
|
|
131
|
-
doc_type = DOCUMENTS[command][:doc_type]
|
|
138
|
+
doc_type = ''
|
|
132
139
|
file_path = ARGV[0]
|
|
133
140
|
end
|
|
134
141
|
|
|
135
|
-
|
|
136
|
-
|
|
137
|
-
|
|
142
|
+
default_cutting = {
|
|
143
|
+
page_indexes: [0, 1, 2, 3, 4],
|
|
144
|
+
operation: :KEEP_ONLY,
|
|
145
|
+
on_min_pages: 0,
|
|
146
|
+
}
|
|
147
|
+
page_options = options[:cut_pages].nil? ? nil : default_cutting
|
|
148
|
+
doc = mindee_client.doc_from_path(file_path)
|
|
149
|
+
result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
|
|
150
|
+
if options[:print_full]
|
|
151
|
+
puts result
|
|
152
|
+
else
|
|
153
|
+
puts result.inference.prediction
|
|
154
|
+
end
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api) from the API Builder.
|
|
2
|
+
|
|
3
|
+
If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
|
|
4
|
+
[API Builder](https://developers.mindee.com/docs/overview).
|
|
5
|
+
|
|
6
|
+
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr),
|
|
7
|
+
created with the [API Builder](https://developers.mindee.com/docs/overview).
|
|
8
|
+
|
|
9
|
+
> 📘 **Info**
|
|
10
|
+
>
|
|
11
|
+
> We used a data model that will be different from yours.
|
|
12
|
+
> To modify this to your own custom API, change the `add_endpoint` call with your own parameters.
|
|
13
|
+
|
|
14
|
+
```ruby
|
|
15
|
+
require 'mindee'
|
|
16
|
+
|
|
17
|
+
# Init a new client and configure your custom document
|
|
18
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
|
19
|
+
'john',
|
|
20
|
+
'wnine',
|
|
21
|
+
version: '1.1' # optional, if not set, use the latest version of the model
|
|
22
|
+
)
|
|
23
|
+
|
|
24
|
+
# Load a file from disk and parse it
|
|
25
|
+
result = mindee_client.doc_from_path('/path/to/file.ext')
|
|
26
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
|
27
|
+
|
|
28
|
+
# Print a summary of the document prediction in RST format
|
|
29
|
+
puts result
|
|
30
|
+
```
|
|
31
|
+
|
|
32
|
+
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
|
33
|
+
This is probably not needed for development but essential for production use.
|
|
34
|
+
|
|
35
|
+
## Parsing Documents
|
|
36
|
+
The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
|
|
37
|
+
The document type must be specified when calling the parse method.
|
|
38
|
+
|
|
39
|
+
```ruby
|
|
40
|
+
result = mindee_client.doc_from_path('/path/to/custom_file')
|
|
41
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
|
42
|
+
puts result
|
|
43
|
+
```
|
|
44
|
+
|
|
45
|
+
> 📘 **Info**
|
|
46
|
+
>
|
|
47
|
+
> If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
|
|
48
|
+
> you **must** specify your account name when calling the `parse` method:
|
|
49
|
+
|
|
50
|
+
```ruby
|
|
51
|
+
mindee_client = Mindee::Client.new.add_endpoint(
|
|
52
|
+
'receipt',
|
|
53
|
+
'john'
|
|
54
|
+
)
|
|
55
|
+
|
|
56
|
+
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
|
57
|
+
.parse(Mindee::Prediction::CustomV1, account_name: 'john')
|
|
58
|
+
```
|
|
59
|
+
|
|
60
|
+
## Document Fields
|
|
61
|
+
All the fields defined in the API builder when creating your custom document are available.
|
|
62
|
+
|
|
63
|
+
In custom documents, each field will hold an array of all the words in the document which are related to that field.
|
|
64
|
+
Each word is an object that has the text content, geometry information, and confidence score.
|
|
65
|
+
|
|
66
|
+
Value fields can be accessed via the `fields` attribute.
|
|
67
|
+
|
|
68
|
+
Classification fields can be accessed via the `classifications` attribute.
|
|
69
|
+
|
|
70
|
+
> 📘 **Info**
|
|
71
|
+
>
|
|
72
|
+
> Both document level and page level objects work in the same way.
|
|
73
|
+
|
|
74
|
+
### Fields Attribute
|
|
75
|
+
The `fields` attribute is a hashmap with the following structure:
|
|
76
|
+
|
|
77
|
+
* key: the API name of the field, as a `symbol`
|
|
78
|
+
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
|
79
|
+
|
|
80
|
+
Individual field values can be accessed by using the field's API name, in the examples below we'll use the `address` field.
|
|
81
|
+
|
|
82
|
+
```ruby
|
|
83
|
+
# raw data, list of each word object
|
|
84
|
+
pp result.inference.prediction.fields[:address].values
|
|
85
|
+
|
|
86
|
+
# list of all values
|
|
87
|
+
puts result.inference.prediction.fields[:address].contents_list
|
|
88
|
+
|
|
89
|
+
# default string representation
|
|
90
|
+
puts result.inference.prediction.fields[:address].to_s
|
|
91
|
+
|
|
92
|
+
# custom string representation
|
|
93
|
+
puts result.inference.prediction.fields[:address].contents_str(separator: '_')
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
To iterate over all the fields:
|
|
97
|
+
```ruby
|
|
98
|
+
result.inference.prediction.fields.each do |name, info|
|
|
99
|
+
puts name
|
|
100
|
+
puts info.values
|
|
101
|
+
end
|
|
102
|
+
```
|
|
103
|
+
|
|
104
|
+
### Classifications Attribute
|
|
105
|
+
The `classifications` attribute is a hashmap with the following structure:
|
|
106
|
+
|
|
107
|
+
* key: the API name of the field, as a `symbol`
|
|
108
|
+
* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
|
|
109
|
+
|
|
110
|
+
```ruby
|
|
111
|
+
# raw data, list of each word object
|
|
112
|
+
puts result.document.classifications[:doc_type].value
|
|
113
|
+
```
|
|
114
|
+
|
|
115
|
+
To iterate over all the classifications:
|
|
116
|
+
```ruby
|
|
117
|
+
result.document.classifications.each do |name, info|
|
|
118
|
+
puts name
|
|
119
|
+
puts info.value
|
|
120
|
+
end
|
|
121
|
+
```
|
|
122
|
+
|
|
123
|
+
## Questions?
|
|
124
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|