mindee 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +21 -0
- data/README.md +46 -23
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +2 -1
- metadata +57 -24
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 729bb9b6e8643c95c194583c4c7e217f9cbdc68149a72125767a2191291142be
|
4
|
+
data.tar.gz: cc2233ca32e0fadaf445e731417238b0fa272865f949fd0c91ef61916f86dfb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 162501228d7fed6ac0829aefcaae88390d43a91cd7b516a837471964b181fe084f9d200e6db447fb4c3b137c3f93ea801661e75ac151a087702143574b8ee9b6
|
7
|
+
data.tar.gz: 960d67e2a53626d3399628b9190972025df886efe50791dc72e5cb0d327f49c99352c66361ff952912b30288c562d3169d2f67ec49a0f8310ed47b40aeedb11d
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -26,7 +26,7 @@ Metrics/BlockLength:
|
|
26
26
|
- '**/*.gemspec'
|
27
27
|
|
28
28
|
Metrics/MethodLength:
|
29
|
-
Max:
|
29
|
+
Max: 45
|
30
30
|
|
31
31
|
Metrics/ClassLength:
|
32
32
|
Max: 200
|
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
|
|
35
35
|
Max: 7
|
36
36
|
|
37
37
|
Metrics/AbcSize:
|
38
|
-
Max:
|
38
|
+
Max: 60
|
39
39
|
|
40
40
|
Style/RegexpLiteral:
|
41
41
|
EnforcedStyle: percent_r
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,20 +1,41 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v2.0.0 - 2023-01-13
|
4
|
+
### ¡Breaking Changes!
|
5
|
+
* :sparkles: add improved PDF merge system
|
6
|
+
* :boom: it should be up to the user to handle API errors
|
7
|
+
* :wastebasket: remove deprecated APIs
|
8
|
+
* :recycle: refactor CLI tool
|
9
|
+
|
10
|
+
### Additions
|
11
|
+
* :sparkles: add support for Invoice v4.1 and Receipt v4.1
|
12
|
+
* :sparkles: add EU license plates
|
13
|
+
* :sparkles: add shipping containers support
|
14
|
+
* :sparkles: add US bank check support
|
15
|
+
* :sparkles: add all French documents
|
16
|
+
* :memo: Add YARD for generating docs
|
17
|
+
* :white_check_mark: add testing on Ruby 3.2
|
18
|
+
* :sparkles: allow setting the request timeout from env
|
19
|
+
|
3
20
|
## v1.2.0 - 2022-12-26
|
4
21
|
### Changes
|
5
22
|
* :arrow_up: switch to origamindee => adds support for Ruby 3
|
6
23
|
|
24
|
+
|
7
25
|
## v1.1.2 - 2022-12-23
|
8
26
|
### Changes
|
9
27
|
* :recycle: use of `append_page` is better for adding pages to a new PDF
|
10
28
|
|
29
|
+
|
11
30
|
## v1.1.1 - 2022-08-08
|
12
31
|
### Fixes
|
13
32
|
* :bug: Fix for missing attribute accessor
|
14
33
|
|
34
|
+
|
15
35
|
## v1.1.0 - 2022-08-04
|
16
36
|
### Changes
|
17
37
|
* :sparkles: Add support for custom API classification field (#5)
|
18
38
|
|
39
|
+
|
19
40
|
## v1.0.0 - 2022-07-28
|
20
41
|
* :tada: First release!
|
data/README.md
CHANGED
@@ -1,13 +1,10 @@
|
|
1
|
-
[![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT)
|
2
|
-
[![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby)
|
3
|
-
[![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee)
|
4
|
-
[![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
|
1
|
+
[![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby) [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee) [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
|
5
2
|
|
6
3
|
# Mindee API Helper Library for Ruby
|
7
4
|
Quickly and easily connect to Mindee's API services using Ruby.
|
8
5
|
|
9
6
|
## Requirements
|
10
|
-
The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1
|
7
|
+
The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
|
11
8
|
|
12
9
|
## Quick Start
|
13
10
|
Here's the TL;DR of getting started.
|
@@ -24,25 +21,38 @@ And then execute:
|
|
24
21
|
|
25
22
|
$ bundle install
|
26
23
|
|
27
|
-
|
24
|
+
Finally, Ruby away!
|
28
25
|
|
29
|
-
|
26
|
+
### Loading a File and Parsing It
|
30
27
|
|
31
|
-
|
28
|
+
#### Global Documents
|
29
|
+
```ruby
|
30
|
+
require 'mindee'
|
31
|
+
|
32
|
+
# Init a new client
|
33
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
32
34
|
|
33
|
-
|
35
|
+
# Load a file from disk and parse it
|
36
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
37
|
+
.parse(Mindee::Prediction::InvoiceV4)
|
38
|
+
|
39
|
+
# Print a full summary of the parsed data in RST format
|
40
|
+
puts result
|
41
|
+
```
|
42
|
+
|
43
|
+
#### Region-Specific Documents
|
34
44
|
```ruby
|
35
45
|
require 'mindee'
|
36
46
|
|
37
|
-
# Init a new client
|
38
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
47
|
+
# Init a new client
|
48
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
39
49
|
|
40
50
|
# Load a file from disk and parse it
|
41
|
-
|
42
|
-
.parse(
|
51
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
52
|
+
.parse(Mindee::Prediction::EU::LicensePlateV1)
|
43
53
|
|
44
|
-
# Print a
|
45
|
-
puts
|
54
|
+
# Print a full summary of the parsed data in RST format
|
55
|
+
puts result.document
|
46
56
|
```
|
47
57
|
|
48
58
|
### Custom Document (API Builder)
|
@@ -50,27 +60,40 @@ puts api_response.document
|
|
50
60
|
require 'mindee'
|
51
61
|
|
52
62
|
# Init a new client and configure your custom document
|
53
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key').
|
63
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
54
64
|
'john',
|
55
65
|
'wnine'
|
56
66
|
)
|
57
67
|
|
58
68
|
# Load a file from disk and parse it
|
59
|
-
|
60
|
-
.parse('wnine')
|
61
|
-
|
62
|
-
# Print a
|
63
|
-
puts
|
69
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
70
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
71
|
+
|
72
|
+
# Print a full summary of the parsed data in RST format
|
73
|
+
puts result
|
74
|
+
|
75
|
+
# Looping over all prediction values
|
76
|
+
result.inference.prediction.fields.each do |field_name, field_data|
|
77
|
+
puts field_name
|
78
|
+
puts field_data.values
|
79
|
+
puts field_data.to_s
|
80
|
+
end
|
64
81
|
```
|
65
82
|
|
66
83
|
## Further Reading
|
67
84
|
There's more to it than that for those that need more features, or want to
|
68
85
|
customize the experience.
|
69
86
|
|
70
|
-
|
71
|
-
|
87
|
+
- [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
|
88
|
+
- [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
|
89
|
+
- [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
|
90
|
+
- [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
|
91
|
+
- [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
|
72
92
|
|
73
93
|
## License
|
74
94
|
Copyright © Mindee, SA
|
75
95
|
|
76
96
|
Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
97
|
+
|
98
|
+
## Questions?
|
99
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/Rakefile
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'rake'
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
require 'yard'
|
4
6
|
|
5
7
|
begin
|
6
8
|
require 'bundler/setup'
|
@@ -11,5 +13,8 @@ end
|
|
11
13
|
|
12
14
|
task default: :spec
|
13
15
|
|
14
|
-
require 'rspec/core/rake_task'
|
15
16
|
RSpec::Core::RakeTask.new(:spec)
|
17
|
+
|
18
|
+
YARD::Rake::YardocTask.new do |task|
|
19
|
+
task.files = ['lib/**/*.rb']
|
20
|
+
end
|
data/bin/mindee.rb
CHANGED
@@ -6,28 +6,52 @@ require 'optparse'
|
|
6
6
|
require 'mindee'
|
7
7
|
|
8
8
|
DOCUMENTS = {
|
9
|
+
"custom" => {
|
10
|
+
help: "Custom document type from API builder",
|
11
|
+
prediction: Mindee::Prediction::CustomV1,
|
12
|
+
},
|
9
13
|
"invoice" => {
|
10
14
|
help: 'Invoice',
|
11
|
-
|
15
|
+
prediction: Mindee::Prediction::InvoiceV4,
|
12
16
|
},
|
13
17
|
"receipt" => {
|
14
18
|
help: "Expense Receipt",
|
15
|
-
|
19
|
+
prediction: Mindee::Prediction::ReceiptV4,
|
16
20
|
},
|
17
21
|
"passport" => {
|
18
22
|
help: "Passport",
|
19
|
-
|
23
|
+
prediction: Mindee::Prediction::PassportV1,
|
20
24
|
},
|
21
|
-
"
|
22
|
-
help: "
|
23
|
-
|
25
|
+
"shipping-container" => {
|
26
|
+
help: "Shipping Container",
|
27
|
+
prediction: Mindee::Prediction::ShippingContainerV1,
|
24
28
|
},
|
25
|
-
"
|
26
|
-
help: "
|
29
|
+
"eu-license-plate" => {
|
30
|
+
help: "EU License Plate",
|
31
|
+
prediction: Mindee::Prediction::EU::LicensePlateV1,
|
32
|
+
},
|
33
|
+
"fr-bank-account-details" => {
|
34
|
+
help: "FR Bank Account Details",
|
35
|
+
prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
|
36
|
+
},
|
37
|
+
"fr-carte-vitale" => {
|
38
|
+
help: "FR Carte Vitale",
|
39
|
+
prediction: Mindee::Prediction::FR::CarteVitaleV1,
|
40
|
+
},
|
41
|
+
"fr-id-card" => {
|
42
|
+
help: "FR ID Card",
|
43
|
+
prediction: Mindee::Prediction::FR::IdCardV1,
|
44
|
+
},
|
45
|
+
"us-bank-check" => {
|
46
|
+
help: "US Bank Check",
|
47
|
+
prediction: Mindee::Prediction::US::BankCheckV1,
|
27
48
|
},
|
28
49
|
}
|
29
50
|
|
30
|
-
options = {
|
51
|
+
options = {
|
52
|
+
api_key: '',
|
53
|
+
print_full: false,
|
54
|
+
}
|
31
55
|
|
32
56
|
def ots_subcommand(command, options)
|
33
57
|
OptionParser.new do |opt|
|
@@ -38,20 +62,20 @@ def ots_subcommand(command, options)
|
|
38
62
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
39
63
|
options[:include_words] = v
|
40
64
|
end
|
41
|
-
opt.on('-
|
42
|
-
options[:
|
65
|
+
opt.on('-c', '--cut-pages', "Cut document pages") do |v|
|
66
|
+
options[:cut_pages] = v
|
43
67
|
end
|
44
68
|
end
|
45
69
|
end
|
46
70
|
|
47
71
|
def custom_subcommand(options)
|
48
72
|
OptionParser.new do |opt|
|
49
|
-
opt.banner = "Usage: custom [options]
|
73
|
+
opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
|
50
74
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
51
75
|
options[:include_words] = v
|
52
76
|
end
|
53
|
-
opt.on('-
|
54
|
-
options[:
|
77
|
+
opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
|
78
|
+
options[:cut_pages] = v
|
55
79
|
end
|
56
80
|
opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
|
57
81
|
options[:api_key] = v
|
@@ -59,79 +83,64 @@ def custom_subcommand(options)
|
|
59
83
|
opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
|
60
84
|
options[:version] = v
|
61
85
|
end
|
62
|
-
opt.on('-
|
63
|
-
options[:
|
86
|
+
opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
|
87
|
+
options[:account_name] = v
|
64
88
|
end
|
65
89
|
end
|
66
90
|
end
|
67
91
|
|
68
|
-
def new_ots_client(options, command)
|
69
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
70
|
-
mindee_client = Mindee::Client.new(
|
71
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
72
|
-
)
|
73
|
-
info = DOCUMENTS[command]
|
74
|
-
mindee_client.send("config_#{info[:doc_type]}")
|
75
|
-
end
|
76
|
-
|
77
|
-
def new_custom_client(options, doc_type)
|
78
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
79
|
-
mindee_client = Mindee::Client.new(
|
80
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
81
|
-
)
|
82
|
-
mindee_client.config_custom_doc(
|
83
|
-
doc_type,
|
84
|
-
options[:user],
|
85
|
-
version: options[:version] || '1'
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
92
|
global_parser = OptionParser.new do |opt|
|
90
93
|
opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
|
91
94
|
opt.separator('')
|
92
95
|
opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
|
93
96
|
opt.separator('')
|
94
|
-
opt.on('-
|
95
|
-
options[:
|
96
|
-
|
97
|
+
opt.on('-f', '--full', "Print the full data, including pages") do |v|
|
98
|
+
options[:print_full] = true
|
99
|
+
end
|
97
100
|
end
|
98
101
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
begin
|
109
|
-
global_parser.order!
|
110
|
-
command = ARGV.shift
|
111
|
-
subcommands[command].order!
|
112
|
-
rescue NoMethodError => e
|
102
|
+
global_parser.order!
|
103
|
+
command = ARGV.shift
|
104
|
+
if command == 'custom'
|
105
|
+
custom_subcommand(options).order!
|
106
|
+
elsif DOCUMENTS.keys.include? command || ''
|
107
|
+
ots_subcommand(command, options).order!
|
108
|
+
else
|
113
109
|
$stderr.puts global_parser
|
114
110
|
exit(1)
|
115
111
|
end
|
116
112
|
|
113
|
+
mindee_client = Mindee::Client.new(api_key: options[:api_key])
|
114
|
+
|
117
115
|
if command == 'custom'
|
118
116
|
if ARGV.length != 2
|
119
|
-
$stderr.puts "The 'custom' command requires both
|
117
|
+
$stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
|
120
118
|
exit(1)
|
121
119
|
end
|
122
120
|
doc_type = ARGV[0]
|
123
121
|
file_path = ARGV[1]
|
124
|
-
mindee_client
|
122
|
+
mindee_client.add_endpoint(
|
123
|
+
options[:account_name], doc_type, version: options[:version] || '1',
|
124
|
+
)
|
125
125
|
else
|
126
126
|
if ARGV.length != 1
|
127
127
|
$stderr.puts 'No file specified.'
|
128
128
|
exit(1)
|
129
129
|
end
|
130
|
-
|
131
|
-
doc_type = DOCUMENTS[command][:doc_type]
|
130
|
+
doc_type = ''
|
132
131
|
file_path = ARGV[0]
|
133
132
|
end
|
134
133
|
|
135
|
-
|
136
|
-
|
137
|
-
|
134
|
+
default_cutting = {
|
135
|
+
page_indexes: [0, 1, 2, 3, 4],
|
136
|
+
operation: :KEEP_ONLY,
|
137
|
+
on_min_pages: 0,
|
138
|
+
}
|
139
|
+
page_options = options[:cut_pages].nil? ? nil : default_cutting
|
140
|
+
doc = mindee_client.doc_from_path(file_path)
|
141
|
+
result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
|
142
|
+
if options[:print_full]
|
143
|
+
puts result
|
144
|
+
else
|
145
|
+
puts result.inference.prediction
|
146
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
|
2
|
+
from the API Builder.
|
3
|
+
|
4
|
+
If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
|
5
|
+
[API Builder](https://developers.mindee.com/docs/overview).
|
6
|
+
|
7
|
+
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
|
8
|
+
created with the [API Builder](https://developers.mindee.com/docs/overview).
|
9
|
+
|
10
|
+
> 📘 **Info**
|
11
|
+
>
|
12
|
+
> We used a data model that may be different from yours. To modify this to your own custom API,
|
13
|
+
> change the `config_custom_doc` call with your own parameters.
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
require 'mindee'
|
17
|
+
|
18
|
+
# Init a new client and configure your custom document
|
19
|
+
mindee_client = Mindee::Client.new(
|
20
|
+
api_key: 'my-api-key', # optional, can be set in environment
|
21
|
+
).config_custom_doc(
|
22
|
+
'wsnine',
|
23
|
+
'john',
|
24
|
+
version: '1.1' # optional, if not set, use the latest version of the model
|
25
|
+
)
|
26
|
+
|
27
|
+
# Load a file from disk and parse it
|
28
|
+
w9_data = mindee_client.doc_from_path('/path/to/file.pdf').parse('wsnine')
|
29
|
+
|
30
|
+
# Print a brief summary of the parsed data
|
31
|
+
puts w9_data.document.to_s
|
32
|
+
```
|
33
|
+
|
34
|
+
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
35
|
+
This is probably not needed for development but essential for production use.
|
36
|
+
|
37
|
+
## Parsing Documents
|
38
|
+
The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
|
39
|
+
The document type must be specified when calling the parse method.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
result = mindee_client.doc_from_path('/path/to/custom_file').parse('wsnine')
|
43
|
+
puts result
|
44
|
+
```
|
45
|
+
|
46
|
+
> 📘 **Info**
|
47
|
+
>
|
48
|
+
> If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
|
49
|
+
> you **must** specify your account name when calling the `parse` method:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
mindee_client = Mindee::Client.new.config_custom_doc(
|
53
|
+
'receipt',
|
54
|
+
'john'
|
55
|
+
)
|
56
|
+
|
57
|
+
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
58
|
+
.parse('receipt', username: 'john')
|
59
|
+
```
|
60
|
+
|
61
|
+
## Document Fields
|
62
|
+
All the fields defined in the API builder when creating your custom document are available.
|
63
|
+
|
64
|
+
In custom documents, each field will hold an array of all the words in the document which are related to that field.
|
65
|
+
Each word is an object that has the text content, geometry information, and confidence score.
|
66
|
+
|
67
|
+
Value fields can be accessed either via the `fields` attribute, or as their own attributes set at run-time.
|
68
|
+
|
69
|
+
Classification fields can be accessed either via the `classifications` attribute, or as their own attributes set at run-time.
|
70
|
+
|
71
|
+
> 📘 **Info**
|
72
|
+
>
|
73
|
+
> Both document level and page level objects work in the same way.
|
74
|
+
|
75
|
+
### Run-time Attributes
|
76
|
+
Individual field values can be accessed simply by using the field's API name, in the examples below we'll use the `address` field.
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
# raw data, list of each word object
|
80
|
+
puts w9_data.document.address.values
|
81
|
+
|
82
|
+
# list of all values
|
83
|
+
puts w9_data.document.address.contents_list
|
84
|
+
|
85
|
+
# default string representation
|
86
|
+
puts w9_data.document.address.to_s
|
87
|
+
|
88
|
+
# custom string representation
|
89
|
+
puts w9_data.document.address.contents_str(separator: '_')
|
90
|
+
```
|
91
|
+
|
92
|
+
### Fields property
|
93
|
+
In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
|
94
|
+
It's a hashmap with the following structure:
|
95
|
+
* key: the API name of the field, as a `symbol`
|
96
|
+
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
# raw data, list of each word object
|
100
|
+
puts w9_data.document.fields[:address].values
|
101
|
+
```
|
102
|
+
|
103
|
+
This makes it simple to iterate over all the fields:
|
104
|
+
```ruby
|
105
|
+
w9_data.document.fields.each do |name, info|
|
106
|
+
puts name
|
107
|
+
puts info.values
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
111
|
+
### Classifications property
|
112
|
+
In addition to accessing a classification field directly, it's possible to access it through the `classifications` attribute.
|
113
|
+
It's a hashmap with the following structure:
|
114
|
+
* key: the API name of the field, as a `symbol`
|
115
|
+
* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
# raw data, list of each word object
|
119
|
+
puts w9_data.document.classifications[:doc_type].value
|
120
|
+
```
|
121
|
+
|
122
|
+
This makes it simple to iterate over all the fields:
|
123
|
+
```ruby
|
124
|
+
w9_data.document.classifications.each do |name, info|
|
125
|
+
puts name
|
126
|
+
puts info.value
|
127
|
+
end
|
128
|
+
```
|
129
|
+
|
130
|
+
## Questions?
|
131
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|