mindee 1.2.0 → 2.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +26 -0
  6. data/README.md +46 -23
  7. data/Rakefile +6 -1
  8. data/bin/mindee.rb +78 -61
  9. data/docs/ruby-api-builder.md +124 -0
  10. data/docs/ruby-getting-started.md +265 -0
  11. data/docs/ruby-invoice-ocr.md +260 -0
  12. data/docs/ruby-passport-ocr.md +156 -0
  13. data/docs/ruby-receipt-ocr.md +170 -0
  14. data/lib/mindee/client.rb +132 -93
  15. data/lib/mindee/document_config.rb +29 -169
  16. data/lib/mindee/geometry.rb +105 -8
  17. data/lib/mindee/http/endpoint.rb +80 -0
  18. data/lib/mindee/input/pdf_processing.rb +106 -0
  19. data/lib/mindee/input/sources.rb +97 -0
  20. data/lib/mindee/input.rb +3 -0
  21. data/lib/mindee/parsing/document.rb +31 -0
  22. data/lib/mindee/parsing/error.rb +22 -0
  23. data/lib/mindee/parsing/inference.rb +53 -0
  24. data/lib/mindee/parsing/page.rb +46 -0
  25. data/lib/mindee/parsing/prediction/base.rb +30 -0
  26. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  29. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  30. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  32. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  33. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  34. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  35. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  36. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  37. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  38. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  39. data/lib/mindee/parsing/prediction/financial_document/financial_document_v1.rb +245 -0
  40. data/lib/mindee/parsing/prediction/financial_document/invoice_line_item.rb +58 -0
  41. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  42. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  43. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  44. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  45. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  46. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  47. data/lib/mindee/parsing/prediction/proof_of_address/proof_of_address_v1.rb +82 -0
  48. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +87 -0
  49. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  50. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  51. data/lib/mindee/parsing/prediction.rb +14 -0
  52. data/lib/mindee/parsing.rb +4 -0
  53. data/lib/mindee/version.rb +1 -1
  54. data/mindee.gemspec +2 -1
  55. metadata +60 -24
  56. data/lib/mindee/documents/base.rb +0 -35
  57. data/lib/mindee/documents/custom.rb +0 -65
  58. data/lib/mindee/documents/financial_doc.rb +0 -135
  59. data/lib/mindee/documents/invoice.rb +0 -162
  60. data/lib/mindee/documents/passport.rb +0 -163
  61. data/lib/mindee/documents/receipt.rb +0 -109
  62. data/lib/mindee/documents.rb +0 -7
  63. data/lib/mindee/endpoint.rb +0 -105
  64. data/lib/mindee/fields/orientation.rb +0 -26
  65. data/lib/mindee/fields.rb +0 -11
  66. data/lib/mindee/inputs.rb +0 -153
  67. data/lib/mindee/response.rb +0 -27
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c0b76bde0354208269f645114e453b15ce48569ae3ce01fd194582f67a2ef6a4
4
- data.tar.gz: 96a22bf367165b1c06d93bc90f91ef4c71f16781dd43cb954852ca0343e583df
3
+ metadata.gz: 7a4e7590c321e473df6da717d6854caf198ee8dc3d818502d6e588d14497c0da
4
+ data.tar.gz: b9922d76cbc0115dff59489b9c77757decd69fcfd8a6da445e0a38229cd85023
5
5
  SHA512:
6
- metadata.gz: 00060a15fad08a14dccbde8d29a482ae54af57458e742893cf2ac2b61a66201e7dc098a92cee4c1b6a47877fc2b9db05a391bb50fb448b49aadeeb478ce03047
7
- data.tar.gz: 81192f06c2e143162675eb1da1caf93481be8a483d8b1dd4dd6d0f6e0eb8869a27163404f260700d5d1e936bdc6523828f8279c820a424da0d96df4a77f467da
6
+ metadata.gz: 01e9dce9c2ba44dea757f1061c4a368b9a675f75d8ab54dd74c06c7c70589aaa80fd320a241c51f618efec9d6ebae8aa9fa033233731e5c214e607464821deb1
7
+ data.tar.gz: 0467da78b85085e0df6cdae33e6135be4dd1be6c8d28f2c2888127c2340c114b404314f80741f0304aca4d34b19c6462ae8798fcb3936d5f2130860610b4ded3
data/.gitignore CHANGED
@@ -42,7 +42,7 @@ build-iPhoneSimulator/
42
42
  ## Documentation cache and generated files:
43
43
  /.yardoc/
44
44
  /_yardoc/
45
- /doc/
45
+ /docs/_build/
46
46
  /rdoc/
47
47
 
48
48
  ## Environment normalization:
data/.rubocop.yml CHANGED
@@ -26,7 +26,7 @@ Metrics/BlockLength:
26
26
  - '**/*.gemspec'
27
27
 
28
28
  Metrics/MethodLength:
29
- Max: 35
29
+ Max: 45
30
30
 
31
31
  Metrics/ClassLength:
32
32
  Max: 200
@@ -35,7 +35,7 @@ Metrics/ParameterLists:
35
35
  Max: 7
36
36
 
37
37
  Metrics/AbcSize:
38
- Max: 50
38
+ Max: 60
39
39
 
40
40
  Style/RegexpLiteral:
41
41
  EnforcedStyle: percent_r
data/.yardopts ADDED
@@ -0,0 +1,4 @@
1
+ --markup markdown
2
+ --main README.md
3
+ --files docs/ruby-getting-started.md,docs/ruby-invoice-ocr.md,docs/ruby-passport-ocr.md,docs/ruby-receipt-ocr.md
4
+ --output-dir docs/_build
data/CHANGELOG.md CHANGED
@@ -1,20 +1,46 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v2.1.0 - 2023-01-30
4
+ ### Changes
5
+ * :sparkles: Add financial document v1 support (Co-authored-by: Oriol Gual)
6
+ * :sparkles: Add Proof of Address v1 support
7
+
8
+ ## v2.0.0 - 2023-01-13
9
+ ### ¡Breaking Changes!
10
+ * :sparkles: add improved PDF merge system
11
+ * :boom: it should be up to the user to handle API errors
12
+ * :wastebasket: remove deprecated APIs
13
+ * :recycle: refactor CLI tool
14
+
15
+ ### Additions
16
+ * :sparkles: add support for Invoice v4.1 and Receipt v4.1
17
+ * :sparkles: add EU license plates
18
+ * :sparkles: add shipping containers support
19
+ * :sparkles: add US bank check support
20
+ * :sparkles: add all French documents
21
+ * :memo: Add YARD for generating docs
22
+ * :white_check_mark: add testing on Ruby 3.2
23
+ * :sparkles: allow setting the request timeout from env
24
+
3
25
  ## v1.2.0 - 2022-12-26
4
26
  ### Changes
5
27
  * :arrow_up: switch to origamindee => adds support for Ruby 3
6
28
 
29
+
7
30
  ## v1.1.2 - 2022-12-23
8
31
  ### Changes
9
32
  * :recycle: use of `append_page` is better for adding pages to a new PDF
10
33
 
34
+
11
35
  ## v1.1.1 - 2022-08-08
12
36
  ### Fixes
13
37
  * :bug: Fix for missing attribute accessor
14
38
 
39
+
15
40
  ## v1.1.0 - 2022-08-04
16
41
  ### Changes
17
42
  * :sparkles: Add support for custom API classification field (#5)
18
43
 
44
+
19
45
  ## v1.0.0 - 2022-07-28
20
46
  * :tada: First release!
data/README.md CHANGED
@@ -1,13 +1,10 @@
1
- [![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT)
2
- [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby)
3
- [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee)
4
- [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
1
+ [![License: MIT](https://img.shields.io/github/license/mindee/mindee-api-ruby)](https://opensource.org/licenses/MIT) [![GitHub Workflow Status](https://img.shields.io/github/actions/workflow/status/mindee/mindee-api-ruby/test.yml)](https://github.com/mindee/mindee-api-ruby) [![Gem Version](https://img.shields.io/gem/v/mindee)](https://rubygems.org/gems/mindee) [![Downloads](https://img.shields.io/gem/dt/mindee.svg)](https://rubygems.org/gems/mindee)
5
2
 
6
3
  # Mindee API Helper Library for Ruby
7
4
  Quickly and easily connect to Mindee's API services using Ruby.
8
5
 
9
6
  ## Requirements
10
- The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1
7
+ The following Ruby versions are tested and supported: 2.6, 2.7, 3.0, 3.1, 3.2
11
8
 
12
9
  ## Quick Start
13
10
  Here's the TL;DR of getting started.
@@ -24,25 +21,38 @@ And then execute:
24
21
 
25
22
  $ bundle install
26
23
 
27
- Or install it yourself as:
24
+ Finally, Ruby away!
28
25
 
29
- $ gem install mindee
26
+ ### Loading a File and Parsing It
30
27
 
31
- Finally, Ruby away!
28
+ #### Global Documents
29
+ ```ruby
30
+ require 'mindee'
31
+
32
+ # Init a new client
33
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
32
34
 
33
- ### Off-the-Shelf Document
35
+ # Load a file from disk and parse it
36
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
37
+ .parse(Mindee::Prediction::InvoiceV4)
38
+
39
+ # Print a full summary of the parsed data in RST format
40
+ puts result
41
+ ```
42
+
43
+ #### Region-Specific Documents
34
44
  ```ruby
35
45
  require 'mindee'
36
46
 
37
- # Init a new client and configure the Invoice API
38
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_invoice
47
+ # Init a new client
48
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key')
39
49
 
40
50
  # Load a file from disk and parse it
41
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
42
- .parse('invoice')
51
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
52
+ .parse(Mindee::Prediction::EU::LicensePlateV1)
43
53
 
44
- # Print a brief summary of the parsed data
45
- puts api_response.document
54
+ # Print a full summary of the parsed data in RST format
55
+ puts result.document
46
56
  ```
47
57
 
48
58
  ### Custom Document (API Builder)
@@ -50,27 +60,40 @@ puts api_response.document
50
60
  require 'mindee'
51
61
 
52
62
  # Init a new client and configure your custom document
53
- mindee_client = Mindee::Client.new(api_key: 'my-api-key').config_custom_doc(
63
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
54
64
  'john',
55
65
  'wnine'
56
66
  )
57
67
 
58
68
  # Load a file from disk and parse it
59
- api_response = mindee_client.doc_from_path('/path/to/the/file.ext')
60
- .parse('wnine')
61
-
62
- # Print a brief summary of the parsed data
63
- puts api_response.document
69
+ result = mindee_client.doc_from_path('/path/to/the/file.ext')
70
+ .parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
71
+
72
+ # Print a full summary of the parsed data in RST format
73
+ puts result
74
+
75
+ # Looping over all prediction values
76
+ result.inference.prediction.fields.each do |field_name, field_data|
77
+ puts field_name
78
+ puts field_data.values
79
+ puts field_data.to_s
80
+ end
64
81
  ```
65
82
 
66
83
  ## Further Reading
67
84
  There's more to it than that for those that need more features, or want to
68
85
  customize the experience.
69
86
 
70
- All the juicy details are described in the
71
- **[Official Documentation](https://developers.mindee.com/docs/ruby-getting-started)**.
87
+ - [Ruby Overview](https://developers.mindee.com/docs/ruby-getting-started)
88
+ - [Ruby Custom APIs OCR](https://developers.mindee.com/docs/ruby-api-builder)
89
+ - [Ruby invoices OCR](https://developers.mindee.com/docs/ruby-invoice-ocr)
90
+ - [Ruby receipts OCR](https://developers.mindee.com/docs/ruby-receipt-ocr)
91
+ - [Ruby passports OCR](https://developers.mindee.com/docs/ruby-passport-ocr)
72
92
 
73
93
  ## License
74
94
  Copyright © Mindee, SA
75
95
 
76
96
  Available as open source under the terms of the [MIT License](https://opensource.org/licenses/MIT).
97
+
98
+ ## Questions?
99
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)
data/Rakefile CHANGED
@@ -1,6 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'rake'
4
+ require 'rspec/core/rake_task'
5
+ require 'yard'
4
6
 
5
7
  begin
6
8
  require 'bundler/setup'
@@ -11,5 +13,8 @@ end
11
13
 
12
14
  task default: :spec
13
15
 
14
- require 'rspec/core/rake_task'
15
16
  RSpec::Core::RakeTask.new(:spec)
17
+
18
+ YARD::Rake::YardocTask.new do |task|
19
+ task.files = ['lib/**/*.rb']
20
+ end
data/bin/mindee.rb CHANGED
@@ -6,28 +6,60 @@ require 'optparse'
6
6
  require 'mindee'
7
7
 
8
8
  DOCUMENTS = {
9
+ "custom" => {
10
+ help: "Custom document type from API builder",
11
+ prediction: Mindee::Prediction::CustomV1,
12
+ },
13
+ "proof-of-address" => {
14
+ help: 'Proof of Address',
15
+ prediction: Mindee::Prediction::ProofOfAddressV1,
16
+ },
17
+ "financial-document" => {
18
+ help: 'Financial Document',
19
+ prediction: Mindee::Prediction::FinancialDocumentV1,
20
+ },
9
21
  "invoice" => {
10
22
  help: 'Invoice',
11
- doc_type: Mindee::Client::DOC_TYPE_INVOICE,
23
+ prediction: Mindee::Prediction::InvoiceV4,
12
24
  },
13
25
  "receipt" => {
14
26
  help: "Expense Receipt",
15
- doc_type: Mindee::Client::DOC_TYPE_RECEIPT,
27
+ prediction: Mindee::Prediction::ReceiptV4,
16
28
  },
17
29
  "passport" => {
18
30
  help: "Passport",
19
- doc_type: Mindee::Client::DOC_TYPE_PASSPORT,
31
+ prediction: Mindee::Prediction::PassportV1,
20
32
  },
21
- "financial" => {
22
- help: "Financial Document (receipt or invoice)",
23
- doc_type: Mindee::Client::DOC_TYPE_FINANCIAL,
33
+ "shipping-container" => {
34
+ help: "Shipping Container",
35
+ prediction: Mindee::Prediction::ShippingContainerV1,
24
36
  },
25
- "custom" => {
26
- help: "Custom document type from API builder",
37
+ "eu-license-plate" => {
38
+ help: "EU License Plate",
39
+ prediction: Mindee::Prediction::EU::LicensePlateV1,
40
+ },
41
+ "fr-bank-account-details" => {
42
+ help: "FR Bank Account Details",
43
+ prediction: Mindee::Prediction::FR::BankAccountDetailsV1,
44
+ },
45
+ "fr-carte-vitale" => {
46
+ help: "FR Carte Vitale",
47
+ prediction: Mindee::Prediction::FR::CarteVitaleV1,
48
+ },
49
+ "fr-id-card" => {
50
+ help: "FR ID Card",
51
+ prediction: Mindee::Prediction::FR::IdCardV1,
52
+ },
53
+ "us-bank-check" => {
54
+ help: "US Bank Check",
55
+ prediction: Mindee::Prediction::US::BankCheckV1,
27
56
  },
28
57
  }
29
58
 
30
- options = {}
59
+ options = {
60
+ api_key: '',
61
+ print_full: false,
62
+ }
31
63
 
32
64
  def ots_subcommand(command, options)
33
65
  OptionParser.new do |opt|
@@ -38,20 +70,20 @@ def ots_subcommand(command, options)
38
70
  opt.on('-w', '--with-words', 'Include words in response') do |v|
39
71
  options[:include_words] = v
40
72
  end
41
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
42
- options[:include_words] = v
73
+ opt.on('-c', '--cut-pages', "Cut document pages") do |v|
74
+ options[:cut_pages] = v
43
75
  end
44
76
  end
45
77
  end
46
78
 
47
79
  def custom_subcommand(options)
48
80
  OptionParser.new do |opt|
49
- opt.banner = "Usage: custom [options] DOC_TYPE FILE"
81
+ opt.banner = "Usage: custom [options] ENDPOINT_NAME FILE"
50
82
  opt.on('-w', '--with-words', 'Include words in response') do |v|
51
83
  options[:include_words] = v
52
84
  end
53
- opt.on('-C', '--no-cut-pages', "Don't cut document pages") do |v|
54
- options[:include_words] = v
85
+ opt.on('-c', '--cut-pages', "Don't cut document pages") do |v|
86
+ options[:cut_pages] = v
55
87
  end
56
88
  opt.on('-k [KEY]', '--key [KEY]', 'API key for the endpoint') do |v|
57
89
  options[:api_key] = v
@@ -59,79 +91,64 @@ def custom_subcommand(options)
59
91
  opt.on('-v [VERSION]', '--version [VERSION]', 'Model version for the API') do |v|
60
92
  options[:version] = v
61
93
  end
62
- opt.on('-u USER', '--user USER', 'API account name for the endpoint') do |v|
63
- options[:user] = v
94
+ opt.on('-a ACCOUNT_NAME', '--account ACCOUNT_NAME', 'API account name for the endpoint') do |v|
95
+ options[:account_name] = v
64
96
  end
65
97
  end
66
98
  end
67
99
 
68
- def new_ots_client(options, command)
69
- raise_on_error = options[:no_raise_errors].nil? ? true : false
70
- mindee_client = Mindee::Client.new(
71
- api_key: options[:api_key], raise_on_error: raise_on_error
72
- )
73
- info = DOCUMENTS[command]
74
- mindee_client.send("config_#{info[:doc_type]}")
75
- end
76
-
77
- def new_custom_client(options, doc_type)
78
- raise_on_error = options[:no_raise_errors].nil? ? true : false
79
- mindee_client = Mindee::Client.new(
80
- api_key: options[:api_key], raise_on_error: raise_on_error
81
- )
82
- mindee_client.config_custom_doc(
83
- doc_type,
84
- options[:user],
85
- version: options[:version] || '1'
86
- )
87
- end
88
-
89
100
  global_parser = OptionParser.new do |opt|
90
101
  opt.banner = "Usage: #{$PROGRAM_NAME} [options] subcommand [options] FILE"
91
102
  opt.separator('')
92
103
  opt.separator("subcommands: #{DOCUMENTS.keys.join(', ')}")
93
104
  opt.separator('')
94
- opt.on('-E', '--no-raise-errors', "raise errors behavior") do |v|
95
- options[:no_raise_errors] = true
96
- end
105
+ opt.on('-f', '--full', "Print the full data, including pages") do |v|
106
+ options[:print_full] = true
107
+ end
97
108
  end
98
109
 
99
- subcommands = {
100
- 'invoice' => ots_subcommand('invoice', options),
101
- 'receipt' => ots_subcommand('receipt', options),
102
- 'passport' => ots_subcommand('passport', options),
103
- 'financial' => ots_subcommand('financial', options),
104
- 'custom' => custom_subcommand(options),
105
- }
106
-
107
-
108
- begin
109
- global_parser.order!
110
- command = ARGV.shift
111
- subcommands[command].order!
112
- rescue NoMethodError => e
110
+ global_parser.order!
111
+ command = ARGV.shift
112
+ if command == 'custom'
113
+ custom_subcommand(options).order!
114
+ elsif DOCUMENTS.keys.include? command || ''
115
+ ots_subcommand(command, options).order!
116
+ else
113
117
  $stderr.puts global_parser
114
118
  exit(1)
115
119
  end
116
120
 
121
+ mindee_client = Mindee::Client.new(api_key: options[:api_key])
122
+
117
123
  if command == 'custom'
118
124
  if ARGV.length != 2
119
- $stderr.puts "The 'custom' command requires both DOC_TYPE and FILE arguments."
125
+ $stderr.puts "The 'custom' command requires both ENDPOINT_NAME and FILE arguments."
120
126
  exit(1)
121
127
  end
122
128
  doc_type = ARGV[0]
123
129
  file_path = ARGV[1]
124
- mindee_client = new_custom_client(options, doc_type)
130
+ mindee_client.add_endpoint(
131
+ options[:account_name], doc_type, version: options[:version] || '1',
132
+ )
125
133
  else
126
134
  if ARGV.length != 1
127
135
  $stderr.puts 'No file specified.'
128
136
  exit(1)
129
137
  end
130
- mindee_client = new_ots_client(options, command)
131
- doc_type = DOCUMENTS[command][:doc_type]
138
+ doc_type = ''
132
139
  file_path = ARGV[0]
133
140
  end
134
141
 
135
- cut_pages = options[:no_cut_pages].nil? ? false : true
136
- doc = mindee_client.doc_from_path(file_path, cut_pages: cut_pages)
137
- puts doc.parse(doc_type).document
142
+ default_cutting = {
143
+ page_indexes: [0, 1, 2, 3, 4],
144
+ operation: :KEEP_ONLY,
145
+ on_min_pages: 0,
146
+ }
147
+ page_options = options[:cut_pages].nil? ? nil : default_cutting
148
+ doc = mindee_client.doc_from_path(file_path)
149
+ result = doc.parse(DOCUMENTS[command][:prediction], endpoint_name: doc_type, page_options: page_options)
150
+ if options[:print_full]
151
+ puts result
152
+ else
153
+ puts result.inference.prediction
154
+ end
@@ -0,0 +1,124 @@
1
+ The Ruby OCR SDK supports [custom-built API](https://developers.mindee.com/docs/build-your-first-document-parsing-api) from the API Builder.
2
+
3
+ If your document isn't covered by one of Mindee's Off-the-Shelf APIs, you can create your own API using the
4
+ [API Builder](https://developers.mindee.com/docs/overview).
5
+
6
+ For the following examples, we are using our own [W9s custom API](https://developers.mindee.com/docs/w9-forms-ocr),
7
+ created with the [API Builder](https://developers.mindee.com/docs/overview).
8
+
9
+ > 📘 **Info**
10
+ >
11
+ > We used a data model that will be different from yours.
12
+ > To modify this to your own custom API, change the `add_endpoint` call with your own parameters.
13
+
14
+ ```ruby
15
+ require 'mindee'
16
+
17
+ # Init a new client and configure your custom document
18
+ mindee_client = Mindee::Client.new(api_key: 'my-api-key').add_endpoint(
19
+ 'john',
20
+ 'wnine',
21
+ version: '1.1' # optional, if not set, use the latest version of the model
22
+ )
23
+
24
+ # Load a file from disk and parse it
25
+ result = mindee_client.doc_from_path('/path/to/file.ext')
26
+ .parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
27
+
28
+ # Print a summary of the document prediction in RST format
29
+ puts result
30
+ ```
31
+
32
+ If the `version` argument is set, you'll be required to update it every time a new model is trained.
33
+ This is probably not needed for development but essential for production use.
34
+
35
+ ## Parsing Documents
36
+ The client calls the `parse` method when parsing your custom document, which will return an object that you can send to the API.
37
+ The document type must be specified when calling the parse method.
38
+
39
+ ```ruby
40
+ result = mindee_client.doc_from_path('/path/to/custom_file')
41
+ .parse(Mindee::Prediction::CustomV1, endpoint_name: 'wnine')
42
+ puts result
43
+ ```
44
+
45
+ > 📘 **Info**
46
+ >
47
+ > If your custom document has the same name as an [off-the-shelf APIs](https://developers.mindee.com/docs/what-is-off-the-shelf-api) document,
48
+ > you **must** specify your account name when calling the `parse` method:
49
+
50
+ ```ruby
51
+ mindee_client = Mindee::Client.new.add_endpoint(
52
+ 'receipt',
53
+ 'john'
54
+ )
55
+
56
+ result = mindee_client.doc_from_path('/path/to/receipt.jpg')
57
+ .parse(Mindee::Prediction::CustomV1, account_name: 'john')
58
+ ```
59
+
60
+ ## Document Fields
61
+ All the fields defined in the API builder when creating your custom document are available.
62
+
63
+ In custom documents, each field will hold an array of all the words in the document which are related to that field.
64
+ Each word is an object that has the text content, geometry information, and confidence score.
65
+
66
+ Value fields can be accessed via the `fields` attribute.
67
+
68
+ Classification fields can be accessed via the `classifications` attribute.
69
+
70
+ > 📘 **Info**
71
+ >
72
+ > Both document level and page level objects work in the same way.
73
+
74
+ ### Fields Attribute
75
+ The `fields` attribute is a hashmap with the following structure:
76
+
77
+ * key: the API name of the field, as a `symbol`
78
+ * value: a `ListField` object which has a `values` attribute, containing a list of all values found for the field.
79
+
80
+ Individual field values can be accessed by using the field's API name, in the examples below we'll use the `address` field.
81
+
82
+ ```ruby
83
+ # raw data, list of each word object
84
+ pp result.inference.prediction.fields[:address].values
85
+
86
+ # list of all values
87
+ puts result.inference.prediction.fields[:address].contents_list
88
+
89
+ # default string representation
90
+ puts result.inference.prediction.fields[:address].to_s
91
+
92
+ # custom string representation
93
+ puts result.inference.prediction.fields[:address].contents_str(separator: '_')
94
+ ```
95
+
96
+ To iterate over all the fields:
97
+ ```ruby
98
+ result.inference.prediction.fields.each do |name, info|
99
+ puts name
100
+ puts info.values
101
+ end
102
+ ```
103
+
104
+ ### Classifications Attribute
105
+ The `classifications` attribute is a hashmap with the following structure:
106
+
107
+ * key: the API name of the field, as a `symbol`
108
+ * value: a `ClassificationField` object which has a `value` attribute, containing a string representation of the detected classification.
109
+
110
+ ```ruby
111
+ # raw data, list of each word object
112
+ puts result.document.classifications[:doc_type].value
113
+ ```
114
+
115
+ To iterate over all the classifications:
116
+ ```ruby
117
+ result.document.classifications.each do |name, info|
118
+ puts name
119
+ puts info.value
120
+ end
121
+ ```
122
+
123
+ ## Questions?
124
+ [Join our Slack](https://join.slack.com/t/mindee-community/shared_invite/zt-1jv6nawjq-FDgFcF2T5CmMmRpl9LLptw)