mindee 1.2.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +21 -0
  6. data/README.md +46 -23
  7. data/Rakefile +6 -1
  8. data/bin/mindee.rb +70 -61
  9. data/docs/ruby-api-builder.md +131 -0
  10. data/docs/ruby-getting-started.md +265 -0
  11. data/docs/ruby-invoice-ocr.md +261 -0
  12. data/docs/ruby-passport-ocr.md +156 -0
  13. data/docs/ruby-receipt-ocr.md +170 -0
  14. data/lib/mindee/client.rb +128 -93
  15. data/lib/mindee/document_config.rb +22 -154
  16. data/lib/mindee/geometry.rb +105 -8
  17. data/lib/mindee/http/endpoint.rb +80 -0
  18. data/lib/mindee/input/pdf_processing.rb +106 -0
  19. data/lib/mindee/input/sources.rb +97 -0
  20. data/lib/mindee/input.rb +3 -0
  21. data/lib/mindee/parsing/document.rb +31 -0
  22. data/lib/mindee/parsing/error.rb +22 -0
  23. data/lib/mindee/parsing/inference.rb +53 -0
  24. data/lib/mindee/parsing/page.rb +46 -0
  25. data/lib/mindee/parsing/prediction/base.rb +30 -0
  26. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  29. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  30. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  32. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  33. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  34. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  35. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  36. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  37. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  38. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  39. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  40. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  41. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  42. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  44. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  45. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  46. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  47. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  48. data/lib/mindee/parsing/prediction.rb +12 -0
  49. data/lib/mindee/parsing.rb +4 -0
  50. data/lib/mindee/version.rb +1 -1
  51. data/mindee.gemspec +2 -1
  52. metadata +57 -24
  53. data/lib/mindee/documents/base.rb +0 -35
  54. data/lib/mindee/documents/custom.rb +0 -65
  55. data/lib/mindee/documents/financial_doc.rb +0 -135
  56. data/lib/mindee/documents/invoice.rb +0 -162
  57. data/lib/mindee/documents/passport.rb +0 -163
  58. data/lib/mindee/documents/receipt.rb +0 -109
  59. data/lib/mindee/documents.rb +0 -7
  60. data/lib/mindee/endpoint.rb +0 -105
  61. data/lib/mindee/fields/orientation.rb +0 -26
  62. data/lib/mindee/fields.rb +0 -11
  63. data/lib/mindee/inputs.rb +0 -153
  64. data/lib/mindee/response.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0b76bde0354208269f645114e453b15ce48569ae3ce01fd194582f67a2ef6a4
4
- data.tar.gz: 96a22bf367165b1c06d93bc90f91ef4c71f16781dd43cb954852ca0343e583df
3
+ metadata.gz: 729bb9b6e8643c95c194583c4c7e217f9cbdc68149a72125767a2191291142be
4
+ data.tar.gz: cc2233ca32e0fadaf445e731417238b0fa272865f949fd0c91ef61916f86dfb4
5
5
  SHA512:
6
- metadata.gz: 00060a15fad08a14dccbde8d29a482ae54af57458e742893cf2ac2b61a66201e7dc098a92cee4c1b6a47877fc2b9db05a391bb50fb448b49aadeeb478ce03047
7
- data.tar.gz: 81192f06c2e143162675eb1da1caf93481be8a483d8b1dd4dd6d0f6e0eb8869a27163404f260700d5d1e936bdc6523828f8279c820a424da0d96df4a77f467da
6
+ metadata.gz: 162501228d7fed6ac0829aefcaae88390d43a91cd7b516a837471964b181fe084f9d200e6db447fb4c3b137c3f93ea801661e75ac151a087702143574b8ee9b6
7
+ data.tar.gz: 960d67e2a53626d3399628b9190972025df886efe50791dc72e5cb0d327f49c99352c66361ff952912b30288c562d3169d2f67ec49a0f8310ed47b40aeedb11d
data/.gitignore CHANGED
@@ -42,7 +42,7 @@ build-iPhoneSimulator/
42
42
  ## Documentation cache and generated files:
43
43
  /.yardoc/
44
44
  /_yardoc/
45
- /doc/
45
+ /docs/_build/
46
46
  /rdoc/
47
47
 
48
48
  ## Environment normalization:
data/.rubocop.yml CHANGED
@@ -26,7 +26,7 @@ Metrics/BlockLength:
26
26
  - '**/*.gemspec'
27
27
 
28
28
  Metrics/MethodLength:
29
- Max: 35
29
+ Max: 45
30
30
 
31
31
  Metrics/ClassLength:
32
32
  Max: 200
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
35
35
  Max: 7
36
36
 
37
37
  Metrics/AbcSize:
38
- Max: 50
38
+ Max: 60
39
39
 
40
40
  Style/RegexpLiteral:
41
41
  EnforcedStyle: percent_r
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ --markup markdown
2
+ --main README.md
3
+ --files docs/ruby-getting-started.md,docs/ruby-invoice-ocr.md,docs/ruby-passport-ocr.md,docs/ruby-receipt-ocr.md
4
+ --output-dir docs/_build
data/CHANGELOG.md CHANGED
@@ -1,20 +1,41 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v2.0.0 - 2023-01-13
4
+ ### ¡Breaking Changes!
5
+ * :sparkles: add improved PDF merge system
6
+ * :boom: it should be up to the user to handle API errors
7
+ * :wastebasket: remove deprecated APIs
8
+ * :recycle: refactor CLI tool
9
+
10
+ ### Additions
11
+ * :sparkles: add support for Invoice v4.1 and Receipt v4.1
12
+ * :sparkles: add EU license plates
13
+ * :sparkles: add shipping containers support
14
+ * :sparkles: add US bank check support
15
+ * :sparkles: add all French documents
16
+ * :memo: Add YARD for generating docs
17
+ * :white_check_mark: add testing on Ruby 3.2
18
+ * :sparkles: allow setting the request timeout from env
19
+
3
20
  ## v1.2.0 - 2022-12-26
4
21
  ### Changes
5
22
  * :arrow_up: switch to origamindee => adds support for Ruby 3
6
23
 
24
+
7
25
  ## v1.1.2 - 2022-12-23
8
26
  ### Changes
9
27
  * :recycle: use of `append_page` is better for adding pages to a new PDF
10
28
 
29
+
11
30
  ## v1.1.1 - 2022-08-08
12
31
  ### Fixes
13
32
  * :bug: Fix for missing attribute accessor
14
33
 
34
+
15
35
  ## v1.1.0 - 2022-08-04
16
36
  ### Changes
17
37
  * :sparkles: Add support for custom API classification field (#5)
18
38
 
39
+
19
40
  ## v1.0.0 - 2022-07-28
20
41
  * :tada: First release!
data/README.md CHANGED
@@ -1,13 +1,10 @@
1
- [![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT)
2
- [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby)
3
- [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee)
4
- [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
1
+ [![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby) [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee) [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
5
2
 
6
3
  # Mindee API Helper Library for Ruby
7
4
  Quickly and easily connect to Mindee's API services using Ruby.
8
5
 
9
6
  ## Requirements
10
- The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1
7
+ The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
11
8
 
12
9
  ## Quick Start
13
10
  Here's the TL;DR of getting started.
@@ -24,25 +21,38 @@ And then execute:
24
21
 
25
22
  $ bundle install
26
23
 
27
- Or install it yourself as:
24
+ Finally, Ruby away!
28
25
 
29
- $ gem install mindee
26
+ ### Loading a File and Parsing It
30
27
 
31
- Finally, Ruby away!
28
+ #### Global Documents
29
+ ```ruby
30
+ require 'mindee'
31
+
32
+ # Init a new client
33
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
32
34
 
33
- ### Off-the-Shelf Document
35
+ # Load a file from disk and parse it
36
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
37
+ .parse(Mindee::Prediction::InvoiceV4)
38
+
39
+ # Print a full summary of the parsed data in RST format
40
+ puts result
41
+ ```
42
+
43
+ #### Region-Specific Documents
34
44
  ```ruby
35
45
  require 'mindee'
36
46
 
37
- # Init a new client and configure the Invoice API
38
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_invoice
47
+ # Init a new client
48
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
39
49
 
40
50
  # Load a file from disk and parse it
41
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
42
- .parse('invoice')
51
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
52
+ .parse(Mindee::Prediction::EU::LicensePlateV1)
43
53
 
44
- # Print a brief summary of the parsed data
45
- puts api_response.document
54
+ # Print a full summary of the parsed data in RST format
55
+ puts result.document
46
56
  ```
47
57
 
48
58
  ### Custom Document (API Builder)
@@ -50,27 +60,40 @@ puts api_response.document
50
60
  require 'mindee'
51
61
 
52
62
  # Init a new client and configure your custom document
53
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_custom_doc(
63
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
54
64
  'john',
55
65
  'wnine'
56
66
  )
57
67
 
58
68
  # Load a file from disk and parse it
59
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
60
- .parse('wnine')
61
-
62
- # Print a brief summary of the parsed data
63
- puts api_response.document
69
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
70
+ .parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
71
+
72
+ # Print a full summary of the parsed data in RST format
73
+ puts result
74
+
75
+ # Looping over all prediction values
76
+ result.inference.prediction.fields.each do |field_name, field_data|
77
+ puts field_name
78
+ puts field_data.values
79
+ puts field_data.to_s
80
+ end
64
81
  ```
65
82
 
66
83
  ## Further Reading
67
84
  There's more to it than that for those that need more features, or want to
68
85
  customize the experience.
69
86
 
70
- All the juicy details are described in the
71
- **[Official Documentation](https://developers.mindee.com/docs/ruby-getting-started)**.
87
+ - [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
88
+ - [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
89
+ - [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
90
+ - [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
91
+ - [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
72
92
 
73
93
  ## License
74
94
  Copyright © Mindee, SA
75
95
 
76
96
  Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
97
+
98
+ ## Questions?
99
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rake'
4
+ require 'rspec/core/rake_task'
5
+ require 'yard'
4
6
 
5
7
  begin
6
8
  require 'bundler/setup'
@@ -11,5 +13,8 @@ end
11
13
 
12
14
  task default: :spec
13
15
 
14
- require 'rspec/core/rake_task'
15
16
  RSpec::Core::RakeTask.new(:spec)
17
+
18
+ YARD::Rake::YardocTask.new do |task|
19
+ task.files = ['lib/**/*.rb']
20
+ end
data/bin/mindee.rb CHANGED
@@ -6,28 +6,52 @@ require 'optparse'
6
6
  require 'mindee'
7
7
 
8
8
  DOCUMENTS = {
9
+ "custom" => {
10
+ help: "Custom document type from API builder",
11
+ prediction: Mindee::Prediction::CustomV1,
12
+ },
9
13
  "invoice" => {
10
14
  help: 'Invoice',
11
- doc_type: Mindee::Client::DOC_TYPE_INVOICE,
15
+ prediction: Mindee::Prediction::InvoiceV4,
12
16
  },
13
17
  "receipt" => {
14
18
  help: "Expense Receipt",
15
- doc_type: Mindee::Client::DOC_TYPE_RECEIPT,
19
+ prediction: Mindee::Prediction::ReceiptV4,
16
20
  },
17
21
  "passport" => {
18
22
  help: "Passport",
19
- doc_type: Mindee::Client::DOC_TYPE_PASSPORT,
23
+ prediction: Mindee::Prediction::PassportV1,
20
24
  },
21
- "financial" => {
22
- help: "Financial Document (receipt or invoice)",
23
- doc_type: Mindee::Client::DOC_TYPE_FINANCIAL,
25
+ "shipping-container" => {
26
+ help: "Shipping Container",
27
+ prediction: Mindee::Prediction::ShippingContainerV1,
24
28
  },
25
- "custom" => {
26
- help: "Custom document type from API builder",
29
+ "eu-license-plate" => {
30
+ help: "EU License Plate",
31
+ prediction: Mindee::Prediction::EU::LicensePlateV1,
32
+ },
33
+ "fr-bank-account-details" => {
34
+ help: "FR Bank Account Details",
35
+ prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
36
+ },
37
+ "fr-carte-vitale" => {
38
+ help: "FR Carte Vitale",
39
+ prediction: Mindee::Prediction::FR::CarteVitaleV1,
40
+ },
41
+ "fr-id-card" => {
42
+ help: "FR ID Card",
43
+ prediction: Mindee::Prediction::FR::IdCardV1,
44
+ },
45
+ "us-bank-check" => {
46
+ help: "US Bank Check",
47
+ prediction: Mindee::Prediction::US::BankCheckV1,
27
48
  },
28
49
  }
29
50
 
30
- options = {}
51
+ options = {
52
+ api_key: '',
53
+ print_full: false,
54
+ }
31
55
 
32
56
  def ots_subcommand(command, options)
33
57
  OptionParser.new do |opt|
@@ -38,20 +62,20 @@ def ots_subcommand(command, options)
38
62
  opt.on('-w', '--with-words', 'Include words in response') do |v|
39
63
  options[:include_words] = v
40
64
  end
41
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
42
- options[:include_words] = v
65
+ opt.on('-c', '--cut-pages', "Cut document pages") do |v|
66
+ options[:cut_pages] = v
43
67
  end
44
68
  end
45
69
  end
46
70
 
47
71
  def custom_subcommand(options)
48
72
  OptionParser.new do |opt|
49
- opt.banner = "Usage: custom [options] DOC_TYPE FILE"
73
+ opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
50
74
  opt.on('-w', '--with-words', 'Include words in response') do |v|
51
75
  options[:include_words] = v
52
76
  end
53
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
54
- options[:include_words] = v
77
+ opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
78
+ options[:cut_pages] = v
55
79
  end
56
80
  opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
57
81
  options[:api_key] = v
@@ -59,79 +83,64 @@ def custom_subcommand(options)
59
83
  opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
60
84
  options[:version] = v
61
85
  end
62
- opt.on('-u USER', '--user USER', 'API account name for the endpoint') do |v|
63
- options[:user] = v
86
+ opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
87
+ options[:account_name] = v
64
88
  end
65
89
  end
66
90
  end
67
91
 
68
- def new_ots_client(options, command)
69
- raise_on_error = options[:no_raise_errors].nil? ? true : false
70
- mindee_client = Mindee::Client.new(
71
- api_key: options[:api_key], raise_on_error: raise_on_error
72
- )
73
- info = DOCUMENTS[command]
74
- mindee_client.send("config_#{info[:doc_type]}")
75
- end
76
-
77
- def new_custom_client(options, doc_type)
78
- raise_on_error = options[:no_raise_errors].nil? ? true : false
79
- mindee_client = Mindee::Client.new(
80
- api_key: options[:api_key], raise_on_error: raise_on_error
81
- )
82
- mindee_client.config_custom_doc(
83
- doc_type,
84
- options[:user],
85
- version: options[:version] || '1'
86
- )
87
- end
88
-
89
92
  global_parser = OptionParser.new do |opt|
90
93
  opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
91
94
  opt.separator('')
92
95
  opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
93
96
  opt.separator('')
94
- opt.on('-E', '--no-raise-errors', "raise errors behavior") do |v|
95
- options[:no_raise_errors] = true
96
- end
97
+ opt.on('-f', '--full', "Print the full data, including pages") do |v|
98
+ options[:print_full] = true
99
+ end
97
100
  end
98
101
 
99
- subcommands = {
100
- 'invoice' => ots_subcommand('invoice', options),
101
- 'receipt' => ots_subcommand('receipt', options),
102
- 'passport' => ots_subcommand('passport', options),
103
- 'financial' => ots_subcommand('financial', options),
104
- 'custom' => custom_subcommand(options),
105
- }
106
-
107
-
108
- begin
109
- global_parser.order!
110
- command = ARGV.shift
111
- subcommands[command].order!
112
- rescue NoMethodError => e
102
+ global_parser.order!
103
+ command = ARGV.shift
104
+ if command == 'custom'
105
+ custom_subcommand(options).order!
106
+ elsif DOCUMENTS.keys.include? command || ''
107
+ ots_subcommand(command, options).order!
108
+ else
113
109
  $stderr.puts global_parser
114
110
  exit(1)
115
111
  end
116
112
 
113
+ mindee_client = Mindee::Client.new(api_key: options[:api_key])
114
+
117
115
  if command == 'custom'
118
116
  if ARGV.length != 2
119
- $stderr.puts "The 'custom' command requires both DOC_TYPE and FILE arguments."
117
+ $stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
120
118
  exit(1)
121
119
  end
122
120
  doc_type = ARGV[0]
123
121
  file_path = ARGV[1]
124
- mindee_client = new_custom_client(options, doc_type)
122
+ mindee_client.add_endpoint(
123
+ options[:account_name], doc_type, version: options[:version] || '1',
124
+ )
125
125
  else
126
126
  if ARGV.length != 1
127
127
  $stderr.puts 'No file specified.'
128
128
  exit(1)
129
129
  end
130
- mindee_client = new_ots_client(options, command)
131
- doc_type = DOCUMENTS[command][:doc_type]
130
+ doc_type = ''
132
131
  file_path = ARGV[0]
133
132
  end
134
133
 
135
- cut_pages = options[:no_cut_pages].nil? ? false : true
136
- doc = mindee_client.doc_from_path(file_path, cut_pages: cut_pages)
137
- puts doc.parse(doc_type).document
134
+ default_cutting = {
135
+ page_indexes: [0, 1, 2, 3, 4],
136
+ operation: :KEEP_ONLY,
137
+ on_min_pages: 0,
138
+ }
139
+ page_options = options[:cut_pages].nil? ? nil : default_cutting
140
+ doc = mindee_client.doc_from_path(file_path)
141
+ result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
142
+ if options[:print_full]
143
+ puts result
144
+ else
145
+ puts result.inference.prediction
146
+ end
@@ -0,0 +1,131 @@
1
+ The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
2
+ from the API Builder.
3
+
4
+ If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
5
+ [API Builder](https://developers.mindee.com/docs/overview).
6
+
7
+ For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
8
+ created with the [API Builder](https://developers.mindee.com/docs/overview).
9
+
10
+ > 📘 **Info**
11
+ >
12
+ > We used a data model that may be different from yours. To modify this to your own custom API,
13
+ > change the `config_custom_doc` call with your own parameters.
14
+
15
+ ```ruby
16
+ require 'mindee'
17
+
18
+ # Init a new client and configure your custom document
19
+ mindee_client = Mindee::Client.new(
20
+ api_key: 'my-api-key', # optional, can be set in environment
21
+ ).config_custom_doc(
22
+ 'wsnine',
23
+ 'john',
24
+ version: '1.1' # optional, if not set, use the latest version of the model
25
+ )
26
+
27
+ # Load a file from disk and parse it
28
+ w9_data = mindee_client.doc_from_path('/path/to/file.pdf').parse('wsnine')
29
+
30
+ # Print a brief summary of the parsed data
31
+ puts w9_data.document.to_s
32
+ ```
33
+
34
+ If the `version` argument is set, you'll be required to update it every time a new model is trained.
35
+ This is probably not needed for development but essential for production use.
36
+
37
+ ## Parsing Documents
38
+ The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
39
+ The document type must be specified when calling the parse method.
40
+
41
+ ```ruby
42
+ result = mindee_client.doc_from_path('/path/to/custom_file').parse('wsnine')
43
+ puts result
44
+ ```
45
+
46
+ > 📘 **Info**
47
+ >
48
+ > If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
49
+ > you **must** specify your account name when calling the `parse` method:
50
+
51
+ ```ruby
52
+ mindee_client = Mindee::Client.new.config_custom_doc(
53
+ 'receipt',
54
+ 'john'
55
+ )
56
+
57
+ result = mindee_client.doc_from_path('/path/to/receipt.jpg')
58
+ .parse('receipt', username: 'john')
59
+ ```
60
+
61
+ ## Document Fields
62
+ All the fields defined in the API builder when creating your custom document are available.
63
+
64
+ In custom documents, each field will hold an array of all the words in the document which are related to that field.
65
+ Each word is an object that has the text content, geometry information, and confidence score.
66
+
67
+ Value fields can be accessed either via the `fields` attribute, or as their own attributes set at run-time.
68
+
69
+ Classification fields can be accessed either via the `classifications` attribute, or as their own attributes set at run-time.
70
+
71
+ > 📘 **Info**
72
+ >
73
+ > Both document level and page level objects work in the same way.
74
+
75
+ ### Run-time Attributes
76
+ Individual field values can be accessed simply by using the field's API name, in the examples below we'll use the `address` field.
77
+
78
+ ```ruby
79
+ # raw data, list of each word object
80
+ puts w9_data.document.address.values
81
+
82
+ # list of all values
83
+ puts w9_data.document.address.contents_list
84
+
85
+ # default string representation
86
+ puts w9_data.document.address.to_s
87
+
88
+ # custom string representation
89
+ puts w9_data.document.address.contents_str(separator: '_')
90
+ ```
91
+
92
+ ### Fields property
93
+ In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
94
+ It's a hashmap with the following structure:
95
+ * key: the API name of the field, as a `symbol`
96
+ * value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
97
+
98
+ ```ruby
99
+ # raw data, list of each word object
100
+ puts w9_data.document.fields[:address].values
101
+ ```
102
+
103
+ This makes it simple to iterate over all the fields:
104
+ ```ruby
105
+ w9_data.document.fields.each do |name, info|
106
+ puts name
107
+ puts info.values
108
+ end
109
+ ```
110
+
111
+ ### Classifications property
112
+ In addition to accessing a classification field directly, it's possible to access it through the `classifications` attribute.
113
+ It's a hashmap with the following structure:
114
+ * key: the API name of the field, as a `symbol`
115
+ * value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
116
+
117
+ ```ruby
118
+ # raw data, list of each word object
119
+ puts w9_data.document.classifications[:doc_type].value
120
+ ```
121
+
122
+ This makes it simple to iterate over all the fields:
123
+ ```ruby
124
+ w9_data.document.classifications.each do |name, info|
125
+ puts name
126
+ puts info.value
127
+ end
128
+ ```
129
+
130
+ ## Questions?
131
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)