mindee 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +0 -7
- data/README.md +52 -21
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +11 -5
- metadata +105 -30
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 729bb9b6e8643c95c194583c4c7e217f9cbdc68149a72125767a2191291142be
|
4
|
+
data.tar.gz: cc2233ca32e0fadaf445e731417238b0fa272865f949fd0c91ef61916f86dfb4
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 162501228d7fed6ac0829aefcaae88390d43a91cd7b516a837471964b181fe084f9d200e6db447fb4c3b137c3f93ea801661e75ac151a087702143574b8ee9b6
|
7
|
+
data.tar.gz: 960d67e2a53626d3399628b9190972025df886efe50791dc72e5cb0d327f49c99352c66361ff952912b30288c562d3169d2f67ec49a0f8310ed47b40aeedb11d
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -26,7 +26,7 @@ Metrics/BlockLength:
|
|
26
26
|
- '**/*.gemspec'
|
27
27
|
|
28
28
|
Metrics/MethodLength:
|
29
|
-
Max:
|
29
|
+
Max: 45
|
30
30
|
|
31
31
|
Metrics/ClassLength:
|
32
32
|
Max: 200
|
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
|
|
35
35
|
Max: 7
|
36
36
|
|
37
37
|
Metrics/AbcSize:
|
38
|
-
Max:
|
38
|
+
Max: 60
|
39
39
|
|
40
40
|
Style/RegexpLiteral:
|
41
41
|
EnforcedStyle: percent_r
|
data/.yardopts
ADDED
data/CHANGELOG.md
CHANGED
@@ -1,16 +1,41 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v2.0.0 - 2023-01-13
|
4
|
+
### ¡Breaking Changes!
|
5
|
+
* :sparkles: add improved PDF merge system
|
6
|
+
* :boom: it should be up to the user to handle API errors
|
7
|
+
* :wastebasket: remove deprecated APIs
|
8
|
+
* :recycle: refactor CLI tool
|
9
|
+
|
10
|
+
### Additions
|
11
|
+
* :sparkles: add support for Invoice v4.1 and Receipt v4.1
|
12
|
+
* :sparkles: add EU license plates
|
13
|
+
* :sparkles: add shipping containers support
|
14
|
+
* :sparkles: add US bank check support
|
15
|
+
* :sparkles: add all French documents
|
16
|
+
* :memo: Add YARD for generating docs
|
17
|
+
* :white_check_mark: add testing on Ruby 3.2
|
18
|
+
* :sparkles: allow setting the request timeout from env
|
19
|
+
|
20
|
+
## v1.2.0 - 2022-12-26
|
21
|
+
### Changes
|
22
|
+
* :arrow_up: switch to origamindee => adds support for Ruby 3
|
23
|
+
|
24
|
+
|
3
25
|
## v1.1.2 - 2022-12-23
|
4
26
|
### Changes
|
5
27
|
* :recycle: use of `append_page` is better for adding pages to a new PDF
|
6
28
|
|
29
|
+
|
7
30
|
## v1.1.1 - 2022-08-08
|
8
31
|
### Fixes
|
9
32
|
* :bug: Fix for missing attribute accessor
|
10
33
|
|
34
|
+
|
11
35
|
## v1.1.0 - 2022-08-04
|
12
36
|
### Changes
|
13
37
|
* :sparkles: Add support for custom API classification field (#5)
|
14
38
|
|
39
|
+
|
15
40
|
## v1.0.0 - 2022-07-28
|
16
41
|
* :tada: First release!
|
data/Gemfile
CHANGED
data/README.md
CHANGED
@@ -1,6 +1,11 @@
|
|
1
|
+
[](https://opensource.org/licenses/MIT) [](https://github.com/mindee/mindee-api-ruby) [](https://rubygems.org/gems/mindee) [](https://rubygems.org/gems/mindee)
|
2
|
+
|
1
3
|
# Mindee API Helper Library for Ruby
|
2
4
|
Quickly and easily connect to Mindee's API services using Ruby.
|
3
5
|
|
6
|
+
## Requirements
|
7
|
+
The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
|
8
|
+
|
4
9
|
## Quick Start
|
5
10
|
Here's the TL;DR of getting started.
|
6
11
|
|
@@ -16,25 +21,38 @@ And then execute:
|
|
16
21
|
|
17
22
|
$ bundle install
|
18
23
|
|
19
|
-
|
24
|
+
Finally, Ruby away!
|
20
25
|
|
21
|
-
|
26
|
+
### Loading a File and Parsing It
|
22
27
|
|
23
|
-
|
28
|
+
#### Global Documents
|
29
|
+
```ruby
|
30
|
+
require 'mindee'
|
31
|
+
|
32
|
+
# Init a new client
|
33
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
24
34
|
|
25
|
-
|
35
|
+
# Load a file from disk and parse it
|
36
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
37
|
+
.parse(Mindee::Prediction::InvoiceV4)
|
38
|
+
|
39
|
+
# Print a full summary of the parsed data in RST format
|
40
|
+
puts result
|
41
|
+
```
|
42
|
+
|
43
|
+
#### Region-Specific Documents
|
26
44
|
```ruby
|
27
45
|
require 'mindee'
|
28
46
|
|
29
|
-
# Init a new client
|
30
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
47
|
+
# Init a new client
|
48
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
31
49
|
|
32
50
|
# Load a file from disk and parse it
|
33
|
-
|
34
|
-
.parse(
|
51
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
52
|
+
.parse(Mindee::Prediction::EU::LicensePlateV1)
|
35
53
|
|
36
|
-
# Print a
|
37
|
-
puts
|
54
|
+
# Print a full summary of the parsed data in RST format
|
55
|
+
puts result.document
|
38
56
|
```
|
39
57
|
|
40
58
|
### Custom Document (API Builder)
|
@@ -42,27 +60,40 @@ puts api_response.document
|
|
42
60
|
require 'mindee'
|
43
61
|
|
44
62
|
# Init a new client and configure your custom document
|
45
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key').
|
46
|
-
'
|
47
|
-
'
|
63
|
+
mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
|
64
|
+
'john',
|
65
|
+
'wnine'
|
48
66
|
)
|
49
67
|
|
50
68
|
# Load a file from disk and parse it
|
51
|
-
|
52
|
-
.parse('
|
53
|
-
|
54
|
-
# Print a
|
55
|
-
puts
|
69
|
+
result = mindee_client.doc_from_path('/path/to/the/file.ext')
|
70
|
+
.parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
|
71
|
+
|
72
|
+
# Print a full summary of the parsed data in RST format
|
73
|
+
puts result
|
74
|
+
|
75
|
+
# Looping over all prediction values
|
76
|
+
result.inference.prediction.fields.each do |field_name, field_data|
|
77
|
+
puts field_name
|
78
|
+
puts field_data.values
|
79
|
+
puts field_data.to_s
|
80
|
+
end
|
56
81
|
```
|
57
82
|
|
58
83
|
## Further Reading
|
59
84
|
There's more to it than that for those that need more features, or want to
|
60
85
|
customize the experience.
|
61
86
|
|
62
|
-
|
63
|
-
|
87
|
+
- [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
|
88
|
+
- [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
|
89
|
+
- [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
|
90
|
+
- [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
|
91
|
+
- [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
|
64
92
|
|
65
93
|
## License
|
66
|
-
Copyright © Mindee
|
94
|
+
Copyright © Mindee, SA
|
67
95
|
|
68
96
|
Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
|
97
|
+
|
98
|
+
## Questions?
|
99
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|
data/Rakefile
CHANGED
@@ -1,6 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'rake'
|
4
|
+
require 'rspec/core/rake_task'
|
5
|
+
require 'yard'
|
4
6
|
|
5
7
|
begin
|
6
8
|
require 'bundler/setup'
|
@@ -11,5 +13,8 @@ end
|
|
11
13
|
|
12
14
|
task default: :spec
|
13
15
|
|
14
|
-
require 'rspec/core/rake_task'
|
15
16
|
RSpec::Core::RakeTask.new(:spec)
|
17
|
+
|
18
|
+
YARD::Rake::YardocTask.new do |task|
|
19
|
+
task.files = ['lib/**/*.rb']
|
20
|
+
end
|
data/bin/mindee.rb
CHANGED
@@ -6,28 +6,52 @@ require 'optparse'
|
|
6
6
|
require 'mindee'
|
7
7
|
|
8
8
|
DOCUMENTS = {
|
9
|
+
"custom" => {
|
10
|
+
help: "Custom document type from API builder",
|
11
|
+
prediction: Mindee::Prediction::CustomV1,
|
12
|
+
},
|
9
13
|
"invoice" => {
|
10
14
|
help: 'Invoice',
|
11
|
-
|
15
|
+
prediction: Mindee::Prediction::InvoiceV4,
|
12
16
|
},
|
13
17
|
"receipt" => {
|
14
18
|
help: "Expense Receipt",
|
15
|
-
|
19
|
+
prediction: Mindee::Prediction::ReceiptV4,
|
16
20
|
},
|
17
21
|
"passport" => {
|
18
22
|
help: "Passport",
|
19
|
-
|
23
|
+
prediction: Mindee::Prediction::PassportV1,
|
20
24
|
},
|
21
|
-
"
|
22
|
-
help: "
|
23
|
-
|
25
|
+
"shipping-container" => {
|
26
|
+
help: "Shipping Container",
|
27
|
+
prediction: Mindee::Prediction::ShippingContainerV1,
|
24
28
|
},
|
25
|
-
"
|
26
|
-
help: "
|
29
|
+
"eu-license-plate" => {
|
30
|
+
help: "EU License Plate",
|
31
|
+
prediction: Mindee::Prediction::EU::LicensePlateV1,
|
32
|
+
},
|
33
|
+
"fr-bank-account-details" => {
|
34
|
+
help: "FR Bank Account Details",
|
35
|
+
prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
|
36
|
+
},
|
37
|
+
"fr-carte-vitale" => {
|
38
|
+
help: "FR Carte Vitale",
|
39
|
+
prediction: Mindee::Prediction::FR::CarteVitaleV1,
|
40
|
+
},
|
41
|
+
"fr-id-card" => {
|
42
|
+
help: "FR ID Card",
|
43
|
+
prediction: Mindee::Prediction::FR::IdCardV1,
|
44
|
+
},
|
45
|
+
"us-bank-check" => {
|
46
|
+
help: "US Bank Check",
|
47
|
+
prediction: Mindee::Prediction::US::BankCheckV1,
|
27
48
|
},
|
28
49
|
}
|
29
50
|
|
30
|
-
options = {
|
51
|
+
options = {
|
52
|
+
api_key: '',
|
53
|
+
print_full: false,
|
54
|
+
}
|
31
55
|
|
32
56
|
def ots_subcommand(command, options)
|
33
57
|
OptionParser.new do |opt|
|
@@ -38,20 +62,20 @@ def ots_subcommand(command, options)
|
|
38
62
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
39
63
|
options[:include_words] = v
|
40
64
|
end
|
41
|
-
opt.on('-
|
42
|
-
options[:
|
65
|
+
opt.on('-c', '--cut-pages', "Cut document pages") do |v|
|
66
|
+
options[:cut_pages] = v
|
43
67
|
end
|
44
68
|
end
|
45
69
|
end
|
46
70
|
|
47
71
|
def custom_subcommand(options)
|
48
72
|
OptionParser.new do |opt|
|
49
|
-
opt.banner = "Usage: custom [options]
|
73
|
+
opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
|
50
74
|
opt.on('-w', '--with-words', 'Include words in response') do |v|
|
51
75
|
options[:include_words] = v
|
52
76
|
end
|
53
|
-
opt.on('-
|
54
|
-
options[:
|
77
|
+
opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
|
78
|
+
options[:cut_pages] = v
|
55
79
|
end
|
56
80
|
opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
|
57
81
|
options[:api_key] = v
|
@@ -59,79 +83,64 @@ def custom_subcommand(options)
|
|
59
83
|
opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
|
60
84
|
options[:version] = v
|
61
85
|
end
|
62
|
-
opt.on('-
|
63
|
-
options[:
|
86
|
+
opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
|
87
|
+
options[:account_name] = v
|
64
88
|
end
|
65
89
|
end
|
66
90
|
end
|
67
91
|
|
68
|
-
def new_ots_client(options, command)
|
69
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
70
|
-
mindee_client = Mindee::Client.new(
|
71
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
72
|
-
)
|
73
|
-
info = DOCUMENTS[command]
|
74
|
-
mindee_client.send("config_#{info[:doc_type]}")
|
75
|
-
end
|
76
|
-
|
77
|
-
def new_custom_client(options, doc_type)
|
78
|
-
raise_on_error = options[:no_raise_errors].nil? ? true : false
|
79
|
-
mindee_client = Mindee::Client.new(
|
80
|
-
api_key: options[:api_key], raise_on_error: raise_on_error
|
81
|
-
)
|
82
|
-
mindee_client.config_custom_doc(
|
83
|
-
doc_type,
|
84
|
-
options[:user],
|
85
|
-
version: options[:version] || '1'
|
86
|
-
)
|
87
|
-
end
|
88
|
-
|
89
92
|
global_parser = OptionParser.new do |opt|
|
90
93
|
opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
|
91
94
|
opt.separator('')
|
92
95
|
opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
|
93
96
|
opt.separator('')
|
94
|
-
opt.on('-
|
95
|
-
options[:
|
96
|
-
|
97
|
+
opt.on('-f', '--full', "Print the full data, including pages") do |v|
|
98
|
+
options[:print_full] = true
|
99
|
+
end
|
97
100
|
end
|
98
101
|
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
begin
|
109
|
-
global_parser.order!
|
110
|
-
command = ARGV.shift
|
111
|
-
subcommands[command].order!
|
112
|
-
rescue NoMethodError => e
|
102
|
+
global_parser.order!
|
103
|
+
command = ARGV.shift
|
104
|
+
if command == 'custom'
|
105
|
+
custom_subcommand(options).order!
|
106
|
+
elsif DOCUMENTS.keys.include? command || ''
|
107
|
+
ots_subcommand(command, options).order!
|
108
|
+
else
|
113
109
|
$stderr.puts global_parser
|
114
110
|
exit(1)
|
115
111
|
end
|
116
112
|
|
113
|
+
mindee_client = Mindee::Client.new(api_key: options[:api_key])
|
114
|
+
|
117
115
|
if command == 'custom'
|
118
116
|
if ARGV.length != 2
|
119
|
-
$stderr.puts "The 'custom' command requires both
|
117
|
+
$stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
|
120
118
|
exit(1)
|
121
119
|
end
|
122
120
|
doc_type = ARGV[0]
|
123
121
|
file_path = ARGV[1]
|
124
|
-
mindee_client
|
122
|
+
mindee_client.add_endpoint(
|
123
|
+
options[:account_name], doc_type, version: options[:version] || '1',
|
124
|
+
)
|
125
125
|
else
|
126
126
|
if ARGV.length != 1
|
127
127
|
$stderr.puts 'No file specified.'
|
128
128
|
exit(1)
|
129
129
|
end
|
130
|
-
|
131
|
-
doc_type = DOCUMENTS[command][:doc_type]
|
130
|
+
doc_type = ''
|
132
131
|
file_path = ARGV[0]
|
133
132
|
end
|
134
133
|
|
135
|
-
|
136
|
-
|
137
|
-
|
134
|
+
default_cutting = {
|
135
|
+
page_indexes: [0, 1, 2, 3, 4],
|
136
|
+
operation: :KEEP_ONLY,
|
137
|
+
on_min_pages: 0,
|
138
|
+
}
|
139
|
+
page_options = options[:cut_pages].nil? ? nil : default_cutting
|
140
|
+
doc = mindee_client.doc_from_path(file_path)
|
141
|
+
result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
|
142
|
+
if options[:print_full]
|
143
|
+
puts result
|
144
|
+
else
|
145
|
+
puts result.inference.prediction
|
146
|
+
end
|
@@ -0,0 +1,131 @@
|
|
1
|
+
The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
|
2
|
+
from the API Builder.
|
3
|
+
|
4
|
+
If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
|
5
|
+
[API Builder](https://developers.mindee.com/docs/overview).
|
6
|
+
|
7
|
+
For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
|
8
|
+
created with the [API Builder](https://developers.mindee.com/docs/overview).
|
9
|
+
|
10
|
+
> 📘 **Info**
|
11
|
+
>
|
12
|
+
> We used a data model that may be different from yours. To modify this to your own custom API,
|
13
|
+
> change the `config_custom_doc` call with your own parameters.
|
14
|
+
|
15
|
+
```ruby
|
16
|
+
require 'mindee'
|
17
|
+
|
18
|
+
# Init a new client and configure your custom document
|
19
|
+
mindee_client = Mindee::Client.new(
|
20
|
+
api_key: 'my-api-key', # optional, can be set in environment
|
21
|
+
).config_custom_doc(
|
22
|
+
'wsnine',
|
23
|
+
'john',
|
24
|
+
version: '1.1' # optional, if not set, use the latest version of the model
|
25
|
+
)
|
26
|
+
|
27
|
+
# Load a file from disk and parse it
|
28
|
+
w9_data = mindee_client.doc_from_path('/path/to/file.pdf').parse('wsnine')
|
29
|
+
|
30
|
+
# Print a brief summary of the parsed data
|
31
|
+
puts w9_data.document.to_s
|
32
|
+
```
|
33
|
+
|
34
|
+
If the `version` argument is set, you'll be required to update it every time a new model is trained.
|
35
|
+
This is probably not needed for development but essential for production use.
|
36
|
+
|
37
|
+
## Parsing Documents
|
38
|
+
The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
|
39
|
+
The document type must be specified when calling the parse method.
|
40
|
+
|
41
|
+
```ruby
|
42
|
+
result = mindee_client.doc_from_path('/path/to/custom_file').parse('wsnine')
|
43
|
+
puts result
|
44
|
+
```
|
45
|
+
|
46
|
+
> 📘 **Info**
|
47
|
+
>
|
48
|
+
> If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
|
49
|
+
> you **must** specify your account name when calling the `parse` method:
|
50
|
+
|
51
|
+
```ruby
|
52
|
+
mindee_client = Mindee::Client.new.config_custom_doc(
|
53
|
+
'receipt',
|
54
|
+
'john'
|
55
|
+
)
|
56
|
+
|
57
|
+
result = mindee_client.doc_from_path('/path/to/receipt.jpg')
|
58
|
+
.parse('receipt', username: 'john')
|
59
|
+
```
|
60
|
+
|
61
|
+
## Document Fields
|
62
|
+
All the fields defined in the API builder when creating your custom document are available.
|
63
|
+
|
64
|
+
In custom documents, each field will hold an array of all the words in the document which are related to that field.
|
65
|
+
Each word is an object that has the text content, geometry information, and confidence score.
|
66
|
+
|
67
|
+
Value fields can be accessed either via the `fields` attribute, or as their own attributes set at run-time.
|
68
|
+
|
69
|
+
Classification fields can be accessed either via the `classifications` attribute, or as their own attributes set at run-time.
|
70
|
+
|
71
|
+
> 📘 **Info**
|
72
|
+
>
|
73
|
+
> Both document level and page level objects work in the same way.
|
74
|
+
|
75
|
+
### Run-time Attributes
|
76
|
+
Individual field values can be accessed simply by using the field's API name, in the examples below we'll use the `address` field.
|
77
|
+
|
78
|
+
```ruby
|
79
|
+
# raw data, list of each word object
|
80
|
+
puts w9_data.document.address.values
|
81
|
+
|
82
|
+
# list of all values
|
83
|
+
puts w9_data.document.address.contents_list
|
84
|
+
|
85
|
+
# default string representation
|
86
|
+
puts w9_data.document.address.to_s
|
87
|
+
|
88
|
+
# custom string representation
|
89
|
+
puts w9_data.document.address.contents_str(separator: '_')
|
90
|
+
```
|
91
|
+
|
92
|
+
### Fields property
|
93
|
+
In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
|
94
|
+
It's a hashmap with the following structure:
|
95
|
+
* key: the API name of the field, as a `symbol`
|
96
|
+
* value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
|
97
|
+
|
98
|
+
```ruby
|
99
|
+
# raw data, list of each word object
|
100
|
+
puts w9_data.document.fields[:address].values
|
101
|
+
```
|
102
|
+
|
103
|
+
This makes it simple to iterate over all the fields:
|
104
|
+
```ruby
|
105
|
+
w9_data.document.fields.each do |name, info|
|
106
|
+
puts name
|
107
|
+
puts info.values
|
108
|
+
end
|
109
|
+
```
|
110
|
+
|
111
|
+
### Classifications property
|
112
|
+
In addition to accessing a classification field directly, it's possible to access it through the `classifications` attribute.
|
113
|
+
It's a hashmap with the following structure:
|
114
|
+
* key: the API name of the field, as a `symbol`
|
115
|
+
* value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
# raw data, list of each word object
|
119
|
+
puts w9_data.document.classifications[:doc_type].value
|
120
|
+
```
|
121
|
+
|
122
|
+
This makes it simple to iterate over all the fields:
|
123
|
+
```ruby
|
124
|
+
w9_data.document.classifications.each do |name, info|
|
125
|
+
puts name
|
126
|
+
puts info.value
|
127
|
+
end
|
128
|
+
```
|
129
|
+
|
130
|
+
## Questions?
|
131
|
+
[Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
|