mindee 1.1.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (65) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +25 -0
  6. data/Gemfile +0 -7
  7. data/README.md +52 -21
  8. data/Rakefile +6 -1
  9. data/bin/mindee.rb +70 -61
  10. data/docs/ruby-api-builder.md +131 -0
  11. data/docs/ruby-getting-started.md +265 -0
  12. data/docs/ruby-invoice-ocr.md +261 -0
  13. data/docs/ruby-passport-ocr.md +156 -0
  14. data/docs/ruby-receipt-ocr.md +170 -0
  15. data/lib/mindee/client.rb +128 -93
  16. data/lib/mindee/document_config.rb +22 -154
  17. data/lib/mindee/geometry.rb +105 -8
  18. data/lib/mindee/http/endpoint.rb +80 -0
  19. data/lib/mindee/input/pdf_processing.rb +106 -0
  20. data/lib/mindee/input/sources.rb +97 -0
  21. data/lib/mindee/input.rb +3 -0
  22. data/lib/mindee/parsing/document.rb +31 -0
  23. data/lib/mindee/parsing/error.rb +22 -0
  24. data/lib/mindee/parsing/inference.rb +53 -0
  25. data/lib/mindee/parsing/page.rb +46 -0
  26. data/lib/mindee/parsing/prediction/base.rb +30 -0
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  29. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  30. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  32. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  33. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  34. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  35. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  36. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  37. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  38. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  39. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  40. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  41. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  42. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  44. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  45. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  46. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  47. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  48. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  49. data/lib/mindee/parsing/prediction.rb +12 -0
  50. data/lib/mindee/parsing.rb +4 -0
  51. data/lib/mindee/version.rb +1 -1
  52. data/mindee.gemspec +11 -5
  53. metadata +105 -30
  54. data/lib/mindee/documents/base.rb +0 -35
  55. data/lib/mindee/documents/custom.rb +0 -65
  56. data/lib/mindee/documents/financial_doc.rb +0 -135
  57. data/lib/mindee/documents/invoice.rb +0 -162
  58. data/lib/mindee/documents/passport.rb +0 -163
  59. data/lib/mindee/documents/receipt.rb +0 -109
  60. data/lib/mindee/documents.rb +0 -7
  61. data/lib/mindee/endpoint.rb +0 -105
  62. data/lib/mindee/fields/orientation.rb +0 -26
  63. data/lib/mindee/fields.rb +0 -11
  64. data/lib/mindee/inputs.rb +0 -153
  65. data/lib/mindee/response.rb +0 -27
@@ -1,105 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'net/http'
4
- require_relative 'version'
5
-
6
- module Mindee
7
- MINDEE_API_URL = 'https://api.mindee.net/v1'
8
- USER_AGENT = "mindee-api-ruby@v#{Mindee::VERSION} ruby-v#{RUBY_VERSION} #{Mindee::PLATFORM}"
9
-
10
- INVOICE_VERSION = '3'
11
- INVOICE_URL_NAME = 'invoices'
12
-
13
- RECEIPT_VERSION = '3'
14
- RECEIPT_URL_NAME = 'expense_receipts'
15
-
16
- PASSPORT_VERSION = '1'
17
- PASSPORT_URL_NAME = 'passport'
18
-
19
- # Generic API endpoint for a product.
20
- class Endpoint
21
- attr_reader :api_key
22
-
23
- def initialize(owner, url_name, version, key_name: nil, api_key: nil)
24
- @owner = owner
25
- @url_name = url_name
26
- @version = version
27
- @key_name = key_name || url_name
28
- @api_key = api_key || set_api_key_from_env
29
- @url_root = "#{MINDEE_API_URL}/products/#{@owner}/#{@url_name}/v#{@version}"
30
- end
31
-
32
- # @param input_doc [Mindee::InputDocument]
33
- # @param include_words [Boolean]
34
- # @param close_file [Boolean]
35
- # @return [Net::HTTPResponse]
36
- def predict_request(input_doc, include_words: false, close_file: true)
37
- uri = URI("#{@url_root}/predict")
38
- headers = {
39
- 'Authorization' => "Token #{@api_key}",
40
- 'User-Agent' => USER_AGENT,
41
- }
42
- req = Net::HTTP::Post.new(uri, headers)
43
-
44
- params = {
45
- 'document' => input_doc.read_document(close: close_file),
46
- }
47
- params.push ['include_mvision', 'true'] if include_words
48
-
49
- req.set_form(params, 'multipart/form-data')
50
-
51
- Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
52
- http.request(req)
53
- end
54
- end
55
-
56
- def envvar_key_name
57
- key_name = to_envar(@key_name)
58
- key_name = "#{to_envar(@owner)}_#{key_name}" if @owner != 'mindee'
59
- "MINDEE_#{key_name}_API_KEY"
60
- end
61
-
62
- private
63
-
64
- # Create a standard way to get/set environment variable names.
65
- def to_envar(name)
66
- name.sub('-', '_').upcase
67
- end
68
-
69
- # Set the endpoint's API key from an environment variable, if present.
70
- # We look first for the specific key, if not set, we'll use the generic key
71
- def set_api_key_from_env
72
- env_key = ENV.fetch(envvar_key_name, nil)
73
- env_key = ENV.fetch('MINDEE_API_KEY', nil) if env_key.nil?
74
- @api_key = env_key if env_key
75
- end
76
- end
77
-
78
- # Invoice API endpoint
79
- class InvoiceEndpoint < Endpoint
80
- def initialize(api_key)
81
- super('mindee', INVOICE_URL_NAME, INVOICE_VERSION, key_name: 'invoice', api_key: api_key)
82
- end
83
- end
84
-
85
- # Receipt API endpoint
86
- class ReceiptEndpoint < Endpoint
87
- def initialize(api_key)
88
- super('mindee', RECEIPT_URL_NAME, RECEIPT_VERSION, key_name: 'receipt', api_key: api_key)
89
- end
90
- end
91
-
92
- # Passport API endpoint
93
- class PassportEndpoint < Endpoint
94
- def initialize(api_key)
95
- super('mindee', PASSPORT_URL_NAME, PASSPORT_VERSION, api_key: api_key)
96
- end
97
- end
98
-
99
- # Custom (constructed) API endpoint
100
- class CustomEndpoint < Endpoint
101
- def initialize(document_type, account_name, version, api_key)
102
- super(account_name, document_type, version, api_key: api_key)
103
- end
104
- end
105
- end
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Represents page orientation.
5
- class Orientation
6
- # @return [Integer]
7
- attr_reader :page_id
8
- # The confidence score, value will be between 0.0 and 1.0
9
- # @return [Float]
10
- attr_reader :confidence
11
- # A prediction among these 3 possible outputs:
12
- # * 0 degrees: the page is already upright
13
- # * 90 degrees: the page must be rotated clockwise to be upright
14
- # * 270 degrees: the page must be rotated counterclockwise to be upright
15
- # @return [Integer]
16
- attr_reader :degrees
17
-
18
- # @param prediction [Hash]
19
- # @param page_id [Integer]
20
- def initialize(prediction, page_id)
21
- @degrees = prediction['degrees']
22
- @confidence = prediction['confidence']
23
- @page_id = page_id
24
- end
25
- end
26
- end
data/lib/mindee/fields.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'fields/amount'
4
- require_relative 'fields/base'
5
- require_relative 'fields/company_registration'
6
- require_relative 'fields/datefield'
7
- require_relative 'fields/locale'
8
- require_relative 'fields/orientation'
9
- require_relative 'fields/payment_details'
10
- require_relative 'fields/tax'
11
- require_relative 'fields/custom_docs'
data/lib/mindee/inputs.rb DELETED
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'stringio'
4
- require 'origami'
5
- require 'marcel'
6
-
7
- # Monkey-patching for Origami
8
- module PDFTools
9
- def to_io_stream(params = {})
10
- options = {
11
- delinearize: true,
12
- recompile: true,
13
- decrypt: false,
14
- }
15
- options.update(params)
16
-
17
- if frozen? # incompatible flags with frozen doc (signed)
18
- options[:recompile] = nil
19
- options[:rebuild_xrefs] = nil
20
- options[:noindent] = nil
21
- options[:obfuscate] = false
22
- end
23
- load_all_objects unless @loaded
24
-
25
- intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
26
- delinearize! if options[:delinearize] && linearized?
27
- compile(options) if options[:recompile]
28
-
29
- io_stream = StringIO.new(output(options))
30
- io_stream.set_encoding Encoding::BINARY
31
- io_stream
32
- end
33
- end
34
-
35
- Origami::PDF.class_eval { include PDFTools }
36
-
37
- module Mindee
38
- ALLOWED_MIME_TYPES = [
39
- 'application/pdf',
40
- 'image/heic',
41
- 'image/png',
42
- 'image/jpeg',
43
- 'image/tiff',
44
- 'image/webp',
45
- ].freeze
46
-
47
- MAX_DOC_PAGES = 3
48
-
49
- # Base class for loading documents.
50
- class InputDocument
51
- # @return [String]
52
- attr_reader :filename
53
- # @return [String]
54
- attr_reader :filepath
55
- # @return [String]
56
- attr_reader :file_mimetype
57
-
58
- # @param cut_pages [Boolean]
59
- # @param max_pages [Integer]
60
- def initialize(cut_pages, max_pages)
61
- @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
62
-
63
- unless ALLOWED_MIME_TYPES.include? @file_mimetype
64
- raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
65
- end
66
-
67
- merge_pdf_pages(max_pages) if cut_pages && pdf?
68
- end
69
-
70
- def pdf?
71
- @file_mimetype == 'application/pdf'
72
- end
73
-
74
- # @return [Integer]
75
- def page_count
76
- if pdf?
77
- current_pdf = open_pdf
78
- return current_pdf.pages.size
79
- end
80
- 1
81
- end
82
-
83
- # @param close [Boolean]
84
- def read_document(close: true)
85
- @io_stream.seek(0)
86
- data = @io_stream.read
87
- @io_stream.close if close
88
- [data].pack('m')
89
- end
90
-
91
- private
92
-
93
- # @param max_pages [Integer]
94
- def merge_pdf_pages(max_pages)
95
- current_pdf = open_pdf
96
- return if current_pdf.pages.size <= MAX_DOC_PAGES
97
-
98
- new_pdf = Origami::PDF.new
99
-
100
- to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
101
- to_insert.each do |page|
102
- new_pdf.append_page(page)
103
- end
104
- @io_stream = new_pdf.to_io_stream
105
- end
106
-
107
- # @return [Origami::PDF]
108
- def open_pdf
109
- pdf_parser = Origami::PDF::LinearParser.new({})
110
- @io_stream.seek(0)
111
- pdf_parser.parse(@io_stream)
112
- end
113
- end
114
-
115
- # Load a document from a path.
116
- class PathDocument < InputDocument
117
- def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
118
- @io_stream = File.open(filepath, 'rb')
119
- @filepath = filepath
120
- @filename = File.basename(filepath)
121
- super(cut_pages, max_pages)
122
- end
123
- end
124
-
125
- # Load a document from a base64 string.
126
- class Base64Document < InputDocument
127
- def initialize(base64_string, filename, cut_pages, max_pages: 3)
128
- @io_stream = StringIO.new(base64_string.unpack1('m*'))
129
- @io_stream.set_encoding Encoding::BINARY
130
- @filename = filename
131
- super(cut_pages, max_pages)
132
- end
133
- end
134
-
135
- # Load a document from raw bytes.
136
- class BytesDocument < InputDocument
137
- def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
138
- @io_stream = StringIO.new(raw_bytes)
139
- @io_stream.set_encoding Encoding::BINARY
140
- @filename = filename
141
- super(cut_pages, max_pages)
142
- end
143
- end
144
-
145
- # Load a document from a file handle.
146
- class FileDocument < InputDocument
147
- def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
148
- @io_stream = file_handle
149
- @filename = filename
150
- super(cut_pages, max_pages)
151
- end
152
- end
153
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Stores all response attributes.
5
- class DocumentResponse
6
- # @return [String]
7
- attr_reader :document_type
8
- # @return [Hash]
9
- attr_reader :http_response
10
- # @return [Mindee::Document]
11
- attr_reader :document
12
- # @return [Array<Mindee::Document>]
13
- attr_reader :pages
14
-
15
- # @param http_response [Hash]
16
- def initialize(http_response, document_type, document, pages)
17
- @http_response = http_response
18
- @document_type = document_type
19
- @document = document
20
- @pages = pages
21
- end
22
-
23
- def to_s
24
- inspect
25
- end
26
- end
27
- end