mindee 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +25 -0
  6. data/Gemfile +0 -7
  7. data/README.md +52 -21
  8. data/Rakefile +6 -1
  9. data/bin/mindee.rb +70 -61
  10. data/docs/ruby-api-builder.md +131 -0
  11. data/docs/ruby-getting-started.md +265 -0
  12. data/docs/ruby-invoice-ocr.md +261 -0
  13. data/docs/ruby-passport-ocr.md +156 -0
  14. data/docs/ruby-receipt-ocr.md +170 -0
  15. data/lib/mindee/client.rb +128 -93
  16. data/lib/mindee/document_config.rb +22 -154
  17. data/lib/mindee/geometry.rb +105 -8
  18. data/lib/mindee/http/endpoint.rb +80 -0
  19. data/lib/mindee/input/pdf_processing.rb +106 -0
  20. data/lib/mindee/input/sources.rb +97 -0
  21. data/lib/mindee/input.rb +3 -0
  22. data/lib/mindee/parsing/document.rb +31 -0
  23. data/lib/mindee/parsing/error.rb +22 -0
  24. data/lib/mindee/parsing/inference.rb +53 -0
  25. data/lib/mindee/parsing/page.rb +46 -0
  26. data/lib/mindee/parsing/prediction/base.rb +30 -0
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  29. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  30. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  32. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  33. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  34. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  35. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  36. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  37. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  38. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  39. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  40. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  41. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  42. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  44. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  45. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  46. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  47. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  48. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  49. data/lib/mindee/parsing/prediction.rb +12 -0
  50. data/lib/mindee/parsing.rb +4 -0
  51. data/lib/mindee/version.rb +1 -1
  52. data/mindee.gemspec +11 -5
  53. metadata +105 -30
  54. data/lib/mindee/documents/base.rb +0 -35
  55. data/lib/mindee/documents/custom.rb +0 -65
  56. data/lib/mindee/documents/financial_doc.rb +0 -135
  57. data/lib/mindee/documents/invoice.rb +0 -162
  58. data/lib/mindee/documents/passport.rb +0 -163
  59. data/lib/mindee/documents/receipt.rb +0 -109
  60. data/lib/mindee/documents.rb +0 -7
  61. data/lib/mindee/endpoint.rb +0 -105
  62. data/lib/mindee/fields/orientation.rb +0 -26
  63. data/lib/mindee/fields.rb +0 -11
  64. data/lib/mindee/inputs.rb +0 -153
  65. data/lib/mindee/response.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 26a122535a224410e763068e0f0b18edeb88af4da890887e7ea846744be88dc6
4
- data.tar.gz: ae80594004d03047bed56a285e74798b1afb855a70e58bbf14284bbc9d2ab8c5
3
+ metadata.gz: 729bb9b6e8643c95c194583c4c7e217f9cbdc68149a72125767a2191291142be
4
+ data.tar.gz: cc2233ca32e0fadaf445e731417238b0fa272865f949fd0c91ef61916f86dfb4
5
5
  SHA512:
6
- metadata.gz: 7c1d7cfbdd6d2c28ceaa4259b2876585a4c84b3a7fddf6511f34880ed28606f5ff8e254de9294fe9a28f2db01673aac3c987c34a6df05c1dde3503117e7d849e
7
- data.tar.gz: 8180cd2c300973141dc834c3184f86b9e2307e39362155c4a62b13b878679e4d3fcaedb85653161521ad42d9b5056e7378bba74f11e451f80468976ea193d915
6
+ metadata.gz: 162501228d7fed6ac0829aefcaae88390d43a91cd7b516a837471964b181fe084f9d200e6db447fb4c3b137c3f93ea801661e75ac151a087702143574b8ee9b6
7
+ data.tar.gz: 960d67e2a53626d3399628b9190972025df886efe50791dc72e5cb0d327f49c99352c66361ff952912b30288c562d3169d2f67ec49a0f8310ed47b40aeedb11d
data/.gitignore CHANGED
@@ -42,7 +42,7 @@ build-iPhoneSimulator/
42
42
  ## Documentation cache and generated files:
43
43
  /.yardoc/
44
44
  /_yardoc/
45
- /doc/
45
+ /docs/_build/
46
46
  /rdoc/
47
47
 
48
48
  ## Environment normalization:
data/.rubocop.yml CHANGED
@@ -26,7 +26,7 @@ Metrics/BlockLength:
26
26
  - '**/*.gemspec'
27
27
 
28
28
  Metrics/MethodLength:
29
- Max: 35
29
+ Max: 45
30
30
 
31
31
  Metrics/ClassLength:
32
32
  Max: 200
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
35
35
  Max: 7
36
36
 
37
37
  Metrics/AbcSize:
38
- Max: 50
38
+ Max: 60
39
39
 
40
40
  Style/RegexpLiteral:
41
41
  EnforcedStyle: percent_r
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ --markup markdown
2
+ --main README.md
3
+ --files docs/ruby-getting-started.md,docs/ruby-invoice-ocr.md,docs/ruby-passport-ocr.md,docs/ruby-receipt-ocr.md
4
+ --output-dir docs/_build
data/CHANGELOG.md CHANGED
@@ -1,16 +1,41 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v2.0.0 - 2023-01-13
4
+ ### ¡Breaking Changes!
5
+ * :sparkles: add improved PDF merge system
6
+ * :boom: it should be up to the user to handle API errors
7
+ * :wastebasket: remove deprecated APIs
8
+ * :recycle: refactor CLI tool
9
+
10
+ ### Additions
11
+ * :sparkles: add support for Invoice v4.1 and Receipt v4.1
12
+ * :sparkles: add EU license plates
13
+ * :sparkles: add shipping containers support
14
+ * :sparkles: add US bank check support
15
+ * :sparkles: add all French documents
16
+ * :memo: Add YARD for generating docs
17
+ * :white_check_mark: add testing on Ruby 3.2
18
+ * :sparkles: allow setting the request timeout from env
19
+
20
+ ## v1.2.0 - 2022-12-26
21
+ ### Changes
22
+ * :arrow_up: switch to origamindee => adds support for Ruby 3
23
+
24
+
3
25
  ## v1.1.2 - 2022-12-23
4
26
  ### Changes
5
27
  * :recycle: use of `append_page` is better for adding pages to a new PDF
6
28
 
29
+
7
30
  ## v1.1.1 - 2022-08-08
8
31
  ### Fixes
9
32
  * :bug: Fix for missing attribute accessor
10
33
 
34
+
11
35
  ## v1.1.0 - 2022-08-04
12
36
  ### Changes
13
37
  * :sparkles: Add support for custom API classification field (#5)
14
38
 
39
+
15
40
  ## v1.0.0 - 2022-07-28
16
41
  * :tada: First release!
data/Gemfile CHANGED
@@ -4,10 +4,3 @@ source 'https://rubygems.org'
4
4
 
5
5
  # Specify your gem's dependencies in mindee.gemspec
6
6
  gemspec
7
-
8
- gem 'rake', '~> 12.0'
9
- gem 'rubocop', require: false
10
-
11
- group :test do
12
- gem 'rspec', '~> 3.10'
13
- end
data/README.md CHANGED
@@ -1,6 +1,11 @@
1
+ [![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby) [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee) [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
2
+
1
3
  # Mindee API Helper Library for Ruby
2
4
  Quickly and easily connect to Mindee's API services using Ruby.
3
5
 
6
+ ## Requirements
7
+ The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
8
+
4
9
  ## Quick Start
5
10
  Here's the TL;DR of getting started.
6
11
 
@@ -16,25 +21,38 @@ And then execute:
16
21
 
17
22
  $ bundle install
18
23
 
19
- Or install it yourself as:
24
+ Finally, Ruby away!
20
25
 
21
- $ gem install mindee
26
+ ### Loading a File and Parsing It
22
27
 
23
- Finally, Ruby away!
28
+ #### Global Documents
29
+ ```ruby
30
+ require 'mindee'
31
+
32
+ # Init a new client
33
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
24
34
 
25
- ### Off-the-Shelf Document
35
+ # Load a file from disk and parse it
36
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
37
+ .parse(Mindee::Prediction::InvoiceV4)
38
+
39
+ # Print a full summary of the parsed data in RST format
40
+ puts result
41
+ ```
42
+
43
+ #### Region-Specific Documents
26
44
  ```ruby
27
45
  require 'mindee'
28
46
 
29
- # Init a new client and configure the Invoice API
30
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_invoice
47
+ # Init a new client
48
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
31
49
 
32
50
  # Load a file from disk and parse it
33
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
34
- .parse('invoice')
51
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
52
+ .parse(Mindee::Prediction::EU::LicensePlateV1)
35
53
 
36
- # Print a brief summary of the parsed data
37
- puts api_response.document
54
+ # Print a full summary of the parsed data in RST format
55
+ puts result.document
38
56
  ```
39
57
 
40
58
  ### Custom Document (API Builder)
@@ -42,27 +60,40 @@ puts api_response.document
42
60
  require 'mindee'
43
61
 
44
62
  # Init a new client and configure your custom document
45
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_custom_doc(
46
- 'pokemon-card',
47
- 'pikachu'
63
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
64
+ 'john',
65
+ 'wnine'
48
66
  )
49
67
 
50
68
  # Load a file from disk and parse it
51
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
52
- .parse('pokemon-card')
53
-
54
- # Print a brief summary of the parsed data
55
- puts api_response.document
69
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
70
+ .parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
71
+
72
+ # Print a full summary of the parsed data in RST format
73
+ puts result
74
+
75
+ # Looping over all prediction values
76
+ result.inference.prediction.fields.each do |field_name, field_data|
77
+ puts field_name
78
+ puts field_data.values
79
+ puts field_data.to_s
80
+ end
56
81
  ```
57
82
 
58
83
  ## Further Reading
59
84
  There's more to it than that for those that need more features, or want to
60
85
  customize the experience.
61
86
 
62
- All the juicy details are described in the
63
- **[Official Documentation](https://developers.mindee.com/docs/ruby-getting-started)**.
87
+ - [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
88
+ - [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
89
+ - [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
90
+ - [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
91
+ - [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
64
92
 
65
93
  ## License
66
- Copyright © Mindee
94
+ Copyright © Mindee, SA
67
95
 
68
96
  Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
97
+
98
+ ## Questions?
99
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rake'
4
+ require 'rspec/core/rake_task'
5
+ require 'yard'
4
6
 
5
7
  begin
6
8
  require 'bundler/setup'
@@ -11,5 +13,8 @@ end
11
13
 
12
14
  task default: :spec
13
15
 
14
- require 'rspec/core/rake_task'
15
16
  RSpec::Core::RakeTask.new(:spec)
17
+
18
+ YARD::Rake::YardocTask.new do |task|
19
+ task.files = ['lib/**/*.rb']
20
+ end
data/bin/mindee.rb CHANGED
@@ -6,28 +6,52 @@ require 'optparse'
6
6
  require 'mindee'
7
7
 
8
8
  DOCUMENTS = {
9
+ "custom" => {
10
+ help: "Custom document type from API builder",
11
+ prediction: Mindee::Prediction::CustomV1,
12
+ },
9
13
  "invoice" => {
10
14
  help: 'Invoice',
11
- doc_type: Mindee::Client::DOC_TYPE_INVOICE,
15
+ prediction: Mindee::Prediction::InvoiceV4,
12
16
  },
13
17
  "receipt" => {
14
18
  help: "Expense Receipt",
15
- doc_type: Mindee::Client::DOC_TYPE_RECEIPT,
19
+ prediction: Mindee::Prediction::ReceiptV4,
16
20
  },
17
21
  "passport" => {
18
22
  help: "Passport",
19
- doc_type: Mindee::Client::DOC_TYPE_PASSPORT,
23
+ prediction: Mindee::Prediction::PassportV1,
20
24
  },
21
- "financial" => {
22
- help: "Financial Document (receipt or invoice)",
23
- doc_type: Mindee::Client::DOC_TYPE_FINANCIAL,
25
+ "shipping-container" => {
26
+ help: "Shipping Container",
27
+ prediction: Mindee::Prediction::ShippingContainerV1,
24
28
  },
25
- "custom" => {
26
- help: "Custom document type from API builder",
29
+ "eu-license-plate" => {
30
+ help: "EU License Plate",
31
+ prediction: Mindee::Prediction::EU::LicensePlateV1,
32
+ },
33
+ "fr-bank-account-details" => {
34
+ help: "FR Bank Account Details",
35
+ prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
36
+ },
37
+ "fr-carte-vitale" => {
38
+ help: "FR Carte Vitale",
39
+ prediction: Mindee::Prediction::FR::CarteVitaleV1,
40
+ },
41
+ "fr-id-card" => {
42
+ help: "FR ID Card",
43
+ prediction: Mindee::Prediction::FR::IdCardV1,
44
+ },
45
+ "us-bank-check" => {
46
+ help: "US Bank Check",
47
+ prediction: Mindee::Prediction::US::BankCheckV1,
27
48
  },
28
49
  }
29
50
 
30
- options = {}
51
+ options = {
52
+ api_key: '',
53
+ print_full: false,
54
+ }
31
55
 
32
56
  def ots_subcommand(command, options)
33
57
  OptionParser.new do |opt|
@@ -38,20 +62,20 @@ def ots_subcommand(command, options)
38
62
  opt.on('-w', '--with-words', 'Include words in response') do |v|
39
63
  options[:include_words] = v
40
64
  end
41
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
42
- options[:include_words] = v
65
+ opt.on('-c', '--cut-pages', "Cut document pages") do |v|
66
+ options[:cut_pages] = v
43
67
  end
44
68
  end
45
69
  end
46
70
 
47
71
  def custom_subcommand(options)
48
72
  OptionParser.new do |opt|
49
- opt.banner = "Usage: custom [options] DOC_TYPE FILE"
73
+ opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
50
74
  opt.on('-w', '--with-words', 'Include words in response') do |v|
51
75
  options[:include_words] = v
52
76
  end
53
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
54
- options[:include_words] = v
77
+ opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
78
+ options[:cut_pages] = v
55
79
  end
56
80
  opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
57
81
  options[:api_key] = v
@@ -59,79 +83,64 @@ def custom_subcommand(options)
59
83
  opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
60
84
  options[:version] = v
61
85
  end
62
- opt.on('-u USER', '--user USER', 'API account name for the endpoint') do |v|
63
- options[:user] = v
86
+ opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
87
+ options[:account_name] = v
64
88
  end
65
89
  end
66
90
  end
67
91
 
68
- def new_ots_client(options, command)
69
- raise_on_error = options[:no_raise_errors].nil? ? true : false
70
- mindee_client = Mindee::Client.new(
71
- api_key: options[:api_key], raise_on_error: raise_on_error
72
- )
73
- info = DOCUMENTS[command]
74
- mindee_client.send("config_#{info[:doc_type]}")
75
- end
76
-
77
- def new_custom_client(options, doc_type)
78
- raise_on_error = options[:no_raise_errors].nil? ? true : false
79
- mindee_client = Mindee::Client.new(
80
- api_key: options[:api_key], raise_on_error: raise_on_error
81
- )
82
- mindee_client.config_custom_doc(
83
- doc_type,
84
- options[:user],
85
- version: options[:version] || '1'
86
- )
87
- end
88
-
89
92
  global_parser = OptionParser.new do |opt|
90
93
  opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
91
94
  opt.separator('')
92
95
  opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
93
96
  opt.separator('')
94
- opt.on('-E', '--no-raise-errors', "raise errors behavior") do |v|
95
- options[:no_raise_errors] = true
96
- end
97
+ opt.on('-f', '--full', "Print the full data, including pages") do |v|
98
+ options[:print_full] = true
99
+ end
97
100
  end
98
101
 
99
- subcommands = {
100
- 'invoice' => ots_subcommand('invoice', options),
101
- 'receipt' => ots_subcommand('receipt', options),
102
- 'passport' => ots_subcommand('passport', options),
103
- 'financial' => ots_subcommand('financial', options),
104
- 'custom' => custom_subcommand(options),
105
- }
106
-
107
-
108
- begin
109
- global_parser.order!
110
- command = ARGV.shift
111
- subcommands[command].order!
112
- rescue NoMethodError => e
102
+ global_parser.order!
103
+ command = ARGV.shift
104
+ if command == 'custom'
105
+ custom_subcommand(options).order!
106
+ elsif DOCUMENTS.keys.include? command || ''
107
+ ots_subcommand(command, options).order!
108
+ else
113
109
  $stderr.puts global_parser
114
110
  exit(1)
115
111
  end
116
112
 
113
+ mindee_client = Mindee::Client.new(api_key: options[:api_key])
114
+
117
115
  if command == 'custom'
118
116
  if ARGV.length != 2
119
- $stderr.puts "The 'custom' command requires both DOC_TYPE and FILE arguments."
117
+ $stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
120
118
  exit(1)
121
119
  end
122
120
  doc_type = ARGV[0]
123
121
  file_path = ARGV[1]
124
- mindee_client = new_custom_client(options, doc_type)
122
+ mindee_client.add_endpoint(
123
+ options[:account_name], doc_type, version: options[:version] || '1',
124
+ )
125
125
  else
126
126
  if ARGV.length != 1
127
127
  $stderr.puts 'No file specified.'
128
128
  exit(1)
129
129
  end
130
- mindee_client = new_ots_client(options, command)
131
- doc_type = DOCUMENTS[command][:doc_type]
130
+ doc_type = ''
132
131
  file_path = ARGV[0]
133
132
  end
134
133
 
135
- cut_pages = options[:no_cut_pages].nil? ? false : true
136
- doc = mindee_client.doc_from_path(file_path, cut_pages: cut_pages)
137
- puts doc.parse(doc_type).document
134
+ default_cutting = {
135
+ page_indexes: [0, 1, 2, 3, 4],
136
+ operation: :KEEP_ONLY,
137
+ on_min_pages: 0,
138
+ }
139
+ page_options = options[:cut_pages].nil? ? nil : default_cutting
140
+ doc = mindee_client.doc_from_path(file_path)
141
+ result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
142
+ if options[:print_full]
143
+ puts result
144
+ else
145
+ puts result.inference.prediction
146
+ end
@@ -0,0 +1,131 @@
1
+ The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api)
2
+ from the API Builder.
3
+
4
+ If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
5
+ [API Builder](https://developers.mindee.com/docs/overview).
6
+
7
+ For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr)
8
+ created with the [API Builder](https://developers.mindee.com/docs/overview).
9
+
10
+ > 📘 **Info**
11
+ >
12
+ > We used a data model that may be different from yours. To modify this to your own custom API,
13
+ > change the `config_custom_doc` call with your own parameters.
14
+
15
+ ```ruby
16
+ require 'mindee'
17
+
18
+ # Init a new client and configure your custom document
19
+ mindee_client = Mindee::Client.new(
20
+ api_key: 'my-api-key', # optional, can be set in environment
21
+ ).config_custom_doc(
22
+ 'wsnine',
23
+ 'john',
24
+ version: '1.1' # optional, if not set, use the latest version of the model
25
+ )
26
+
27
+ # Load a file from disk and parse it
28
+ w9_data = mindee_client.doc_from_path('/path/to/file.pdf').parse('wsnine')
29
+
30
+ # Print a brief summary of the parsed data
31
+ puts w9_data.document.to_s
32
+ ```
33
+
34
+ If the `version` argument is set, you'll be required to update it every time a new model is trained.
35
+ This is probably not needed for development but essential for production use.
36
+
37
+ ## Parsing Documents
38
+ The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
39
+ The document type must be specified when calling the parse method.
40
+
41
+ ```ruby
42
+ result = mindee_client.doc_from_path('/path/to/custom_file').parse('wsnine')
43
+ puts result
44
+ ```
45
+
46
+ > 📘 **Info**
47
+ >
48
+ > If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
49
+ > you **must** specify your account name when calling the `parse` method:
50
+
51
+ ```ruby
52
+ mindee_client = Mindee::Client.new.config_custom_doc(
53
+ 'receipt',
54
+ 'john'
55
+ )
56
+
57
+ result = mindee_client.doc_from_path('/path/to/receipt.jpg')
58
+ .parse('receipt', username: 'john')
59
+ ```
60
+
61
+ ## Document Fields
62
+ All the fields defined in the API builder when creating your custom document are available.
63
+
64
+ In custom documents, each field will hold an array of all the words in the document which are related to that field.
65
+ Each word is an object that has the text content, geometry information, and confidence score.
66
+
67
+ Value fields can be accessed either via the `fields` attribute, or as their own attributes set at run-time.
68
+
69
+ Classification fields can be accessed either via the `classifications` attribute, or as their own attributes set at run-time.
70
+
71
+ > 📘 **Info**
72
+ >
73
+ > Both document level and page level objects work in the same way.
74
+
75
+ ### Run-time Attributes
76
+ Individual field values can be accessed simply by using the field's API name, in the examples below we'll use the `address` field.
77
+
78
+ ```ruby
79
+ # raw data, list of each word object
80
+ puts w9_data.document.address.values
81
+
82
+ # list of all values
83
+ puts w9_data.document.address.contents_list
84
+
85
+ # default string representation
86
+ puts w9_data.document.address.to_s
87
+
88
+ # custom string representation
89
+ puts w9_data.document.address.contents_str(separator: '_')
90
+ ```
91
+
92
+ ### Fields property
93
+ In addition to accessing a value field directly, it's possible to access it through the `fields` attribute.
94
+ It's a hashmap with the following structure:
95
+ * key: the API name of the field, as a `symbol`
96
+ * value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
97
+
98
+ ```ruby
99
+ # raw data, list of each word object
100
+ puts w9_data.document.fields[:address].values
101
+ ```
102
+
103
+ This makes it simple to iterate over all the fields:
104
+ ```ruby
105
+ w9_data.document.fields.each do |name, info|
106
+ puts name
107
+ puts info.values
108
+ end
109
+ ```
110
+
111
+ ### Classifications property
112
+ In addition to accessing a classification field directly, it's possible to access it through the `classifications` attribute.
113
+ It's a hashmap with the following structure:
114
+ * key: the API name of the field, as a `symbol`
115
+ * value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
116
+
117
+ ```ruby
118
+ # raw data, list of each word object
119
+ puts w9_data.document.classifications[:doc_type].value
120
+ ```
121
+
122
+ This makes it simple to iterate over all the fields:
123
+ ```ruby
124
+ w9_data.document.classifications.each do |name, info|
125
+ puts name
126
+ puts info.value
127
+ end
128
+ ```
129
+
130
+ ## Questions?
131
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)