mindee 1.1.2 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +25 -0
- data/Gemfile +0 -7
- data/README.md +52 -21
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +11 -5
- metadata +105 -30
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
data/lib/mindee/endpoint.rb
DELETED
@@ -1,105 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'net/http'
|
4
|
-
require_relative 'version'
|
5
|
-
|
6
|
-
module Mindee
|
7
|
-
MINDEE_API_URL = 'https://api.mindee.net/v1'
|
8
|
-
USER_AGENT = "mindee-api-ruby@v#{Mindee::VERSION} ruby-v#{RUBY_VERSION} #{Mindee::PLATFORM}"
|
9
|
-
|
10
|
-
INVOICE_VERSION = '3'
|
11
|
-
INVOICE_URL_NAME = 'invoices'
|
12
|
-
|
13
|
-
RECEIPT_VERSION = '3'
|
14
|
-
RECEIPT_URL_NAME = 'expense_receipts'
|
15
|
-
|
16
|
-
PASSPORT_VERSION = '1'
|
17
|
-
PASSPORT_URL_NAME = 'passport'
|
18
|
-
|
19
|
-
# Generic API endpoint for a product.
|
20
|
-
class Endpoint
|
21
|
-
attr_reader :api_key
|
22
|
-
|
23
|
-
def initialize(owner, url_name, version, key_name: nil, api_key: nil)
|
24
|
-
@owner = owner
|
25
|
-
@url_name = url_name
|
26
|
-
@version = version
|
27
|
-
@key_name = key_name || url_name
|
28
|
-
@api_key = api_key || set_api_key_from_env
|
29
|
-
@url_root = "#{MINDEE_API_URL}/products/#{@owner}/#{@url_name}/v#{@version}"
|
30
|
-
end
|
31
|
-
|
32
|
-
# @param input_doc [Mindee::InputDocument]
|
33
|
-
# @param include_words [Boolean]
|
34
|
-
# @param close_file [Boolean]
|
35
|
-
# @return [Net::HTTPResponse]
|
36
|
-
def predict_request(input_doc, include_words: false, close_file: true)
|
37
|
-
uri = URI("#{@url_root}/predict")
|
38
|
-
headers = {
|
39
|
-
'Authorization' => "Token #{@api_key}",
|
40
|
-
'User-Agent' => USER_AGENT,
|
41
|
-
}
|
42
|
-
req = Net::HTTP::Post.new(uri, headers)
|
43
|
-
|
44
|
-
params = {
|
45
|
-
'document' => input_doc.read_document(close: close_file),
|
46
|
-
}
|
47
|
-
params.push ['include_mvision', 'true'] if include_words
|
48
|
-
|
49
|
-
req.set_form(params, 'multipart/form-data')
|
50
|
-
|
51
|
-
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
52
|
-
http.request(req)
|
53
|
-
end
|
54
|
-
end
|
55
|
-
|
56
|
-
def envvar_key_name
|
57
|
-
key_name = to_envar(@key_name)
|
58
|
-
key_name = "#{to_envar(@owner)}_#{key_name}" if @owner != 'mindee'
|
59
|
-
"MINDEE_#{key_name}_API_KEY"
|
60
|
-
end
|
61
|
-
|
62
|
-
private
|
63
|
-
|
64
|
-
# Create a standard way to get/set environment variable names.
|
65
|
-
def to_envar(name)
|
66
|
-
name.sub('-', '_').upcase
|
67
|
-
end
|
68
|
-
|
69
|
-
# Set the endpoint's API key from an environment variable, if present.
|
70
|
-
# We look first for the specific key, if not set, we'll use the generic key
|
71
|
-
def set_api_key_from_env
|
72
|
-
env_key = ENV.fetch(envvar_key_name, nil)
|
73
|
-
env_key = ENV.fetch('MINDEE_API_KEY', nil) if env_key.nil?
|
74
|
-
@api_key = env_key if env_key
|
75
|
-
end
|
76
|
-
end
|
77
|
-
|
78
|
-
# Invoice API endpoint
|
79
|
-
class InvoiceEndpoint < Endpoint
|
80
|
-
def initialize(api_key)
|
81
|
-
super('mindee', INVOICE_URL_NAME, INVOICE_VERSION, key_name: 'invoice', api_key: api_key)
|
82
|
-
end
|
83
|
-
end
|
84
|
-
|
85
|
-
# Receipt API endpoint
|
86
|
-
class ReceiptEndpoint < Endpoint
|
87
|
-
def initialize(api_key)
|
88
|
-
super('mindee', RECEIPT_URL_NAME, RECEIPT_VERSION, key_name: 'receipt', api_key: api_key)
|
89
|
-
end
|
90
|
-
end
|
91
|
-
|
92
|
-
# Passport API endpoint
|
93
|
-
class PassportEndpoint < Endpoint
|
94
|
-
def initialize(api_key)
|
95
|
-
super('mindee', PASSPORT_URL_NAME, PASSPORT_VERSION, api_key: api_key)
|
96
|
-
end
|
97
|
-
end
|
98
|
-
|
99
|
-
# Custom (constructed) API endpoint
|
100
|
-
class CustomEndpoint < Endpoint
|
101
|
-
def initialize(document_type, account_name, version, api_key)
|
102
|
-
super(account_name, document_type, version, api_key: api_key)
|
103
|
-
end
|
104
|
-
end
|
105
|
-
end
|
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Mindee
|
4
|
-
# Represents page orientation.
|
5
|
-
class Orientation
|
6
|
-
# @return [Integer]
|
7
|
-
attr_reader :page_id
|
8
|
-
# The confidence score, value will be between 0.0 and 1.0
|
9
|
-
# @return [Float]
|
10
|
-
attr_reader :confidence
|
11
|
-
# A prediction among these 3 possible outputs:
|
12
|
-
# * 0 degrees: the page is already upright
|
13
|
-
# * 90 degrees: the page must be rotated clockwise to be upright
|
14
|
-
# * 270 degrees: the page must be rotated counterclockwise to be upright
|
15
|
-
# @return [Integer]
|
16
|
-
attr_reader :degrees
|
17
|
-
|
18
|
-
# @param prediction [Hash]
|
19
|
-
# @param page_id [Integer]
|
20
|
-
def initialize(prediction, page_id)
|
21
|
-
@degrees = prediction['degrees']
|
22
|
-
@confidence = prediction['confidence']
|
23
|
-
@page_id = page_id
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
data/lib/mindee/fields.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'fields/amount'
|
4
|
-
require_relative 'fields/base'
|
5
|
-
require_relative 'fields/company_registration'
|
6
|
-
require_relative 'fields/datefield'
|
7
|
-
require_relative 'fields/locale'
|
8
|
-
require_relative 'fields/orientation'
|
9
|
-
require_relative 'fields/payment_details'
|
10
|
-
require_relative 'fields/tax'
|
11
|
-
require_relative 'fields/custom_docs'
|
data/lib/mindee/inputs.rb
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'stringio'
|
4
|
-
require 'origami'
|
5
|
-
require 'marcel'
|
6
|
-
|
7
|
-
# Monkey-patching for Origami
|
8
|
-
module PDFTools
|
9
|
-
def to_io_stream(params = {})
|
10
|
-
options = {
|
11
|
-
delinearize: true,
|
12
|
-
recompile: true,
|
13
|
-
decrypt: false,
|
14
|
-
}
|
15
|
-
options.update(params)
|
16
|
-
|
17
|
-
if frozen? # incompatible flags with frozen doc (signed)
|
18
|
-
options[:recompile] = nil
|
19
|
-
options[:rebuild_xrefs] = nil
|
20
|
-
options[:noindent] = nil
|
21
|
-
options[:obfuscate] = false
|
22
|
-
end
|
23
|
-
load_all_objects unless @loaded
|
24
|
-
|
25
|
-
intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
|
26
|
-
delinearize! if options[:delinearize] && linearized?
|
27
|
-
compile(options) if options[:recompile]
|
28
|
-
|
29
|
-
io_stream = StringIO.new(output(options))
|
30
|
-
io_stream.set_encoding Encoding::BINARY
|
31
|
-
io_stream
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
Origami::PDF.class_eval { include PDFTools }
|
36
|
-
|
37
|
-
module Mindee
|
38
|
-
ALLOWED_MIME_TYPES = [
|
39
|
-
'application/pdf',
|
40
|
-
'image/heic',
|
41
|
-
'image/png',
|
42
|
-
'image/jpeg',
|
43
|
-
'image/tiff',
|
44
|
-
'image/webp',
|
45
|
-
].freeze
|
46
|
-
|
47
|
-
MAX_DOC_PAGES = 3
|
48
|
-
|
49
|
-
# Base class for loading documents.
|
50
|
-
class InputDocument
|
51
|
-
# @return [String]
|
52
|
-
attr_reader :filename
|
53
|
-
# @return [String]
|
54
|
-
attr_reader :filepath
|
55
|
-
# @return [String]
|
56
|
-
attr_reader :file_mimetype
|
57
|
-
|
58
|
-
# @param cut_pages [Boolean]
|
59
|
-
# @param max_pages [Integer]
|
60
|
-
def initialize(cut_pages, max_pages)
|
61
|
-
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
62
|
-
|
63
|
-
unless ALLOWED_MIME_TYPES.include? @file_mimetype
|
64
|
-
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
65
|
-
end
|
66
|
-
|
67
|
-
merge_pdf_pages(max_pages) if cut_pages && pdf?
|
68
|
-
end
|
69
|
-
|
70
|
-
def pdf?
|
71
|
-
@file_mimetype == 'application/pdf'
|
72
|
-
end
|
73
|
-
|
74
|
-
# @return [Integer]
|
75
|
-
def page_count
|
76
|
-
if pdf?
|
77
|
-
current_pdf = open_pdf
|
78
|
-
return current_pdf.pages.size
|
79
|
-
end
|
80
|
-
1
|
81
|
-
end
|
82
|
-
|
83
|
-
# @param close [Boolean]
|
84
|
-
def read_document(close: true)
|
85
|
-
@io_stream.seek(0)
|
86
|
-
data = @io_stream.read
|
87
|
-
@io_stream.close if close
|
88
|
-
[data].pack('m')
|
89
|
-
end
|
90
|
-
|
91
|
-
private
|
92
|
-
|
93
|
-
# @param max_pages [Integer]
|
94
|
-
def merge_pdf_pages(max_pages)
|
95
|
-
current_pdf = open_pdf
|
96
|
-
return if current_pdf.pages.size <= MAX_DOC_PAGES
|
97
|
-
|
98
|
-
new_pdf = Origami::PDF.new
|
99
|
-
|
100
|
-
to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
|
101
|
-
to_insert.each do |page|
|
102
|
-
new_pdf.append_page(page)
|
103
|
-
end
|
104
|
-
@io_stream = new_pdf.to_io_stream
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [Origami::PDF]
|
108
|
-
def open_pdf
|
109
|
-
pdf_parser = Origami::PDF::LinearParser.new({})
|
110
|
-
@io_stream.seek(0)
|
111
|
-
pdf_parser.parse(@io_stream)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# Load a document from a path.
|
116
|
-
class PathDocument < InputDocument
|
117
|
-
def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
|
118
|
-
@io_stream = File.open(filepath, 'rb')
|
119
|
-
@filepath = filepath
|
120
|
-
@filename = File.basename(filepath)
|
121
|
-
super(cut_pages, max_pages)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# Load a document from a base64 string.
|
126
|
-
class Base64Document < InputDocument
|
127
|
-
def initialize(base64_string, filename, cut_pages, max_pages: 3)
|
128
|
-
@io_stream = StringIO.new(base64_string.unpack1('m*'))
|
129
|
-
@io_stream.set_encoding Encoding::BINARY
|
130
|
-
@filename = filename
|
131
|
-
super(cut_pages, max_pages)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Load a document from raw bytes.
|
136
|
-
class BytesDocument < InputDocument
|
137
|
-
def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
|
138
|
-
@io_stream = StringIO.new(raw_bytes)
|
139
|
-
@io_stream.set_encoding Encoding::BINARY
|
140
|
-
@filename = filename
|
141
|
-
super(cut_pages, max_pages)
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Load a document from a file handle.
|
146
|
-
class FileDocument < InputDocument
|
147
|
-
def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
|
148
|
-
@io_stream = file_handle
|
149
|
-
@filename = filename
|
150
|
-
super(cut_pages, max_pages)
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
data/lib/mindee/response.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Mindee
|
4
|
-
# Stores all response attributes.
|
5
|
-
class DocumentResponse
|
6
|
-
# @return [String]
|
7
|
-
attr_reader :document_type
|
8
|
-
# @return [Hash]
|
9
|
-
attr_reader :http_response
|
10
|
-
# @return [Mindee::Document]
|
11
|
-
attr_reader :document
|
12
|
-
# @return [Array<Mindee::Document>]
|
13
|
-
attr_reader :pages
|
14
|
-
|
15
|
-
# @param http_response [Hash]
|
16
|
-
def initialize(http_response, document_type, document, pages)
|
17
|
-
@http_response = http_response
|
18
|
-
@document_type = document_type
|
19
|
-
@document = document
|
20
|
-
@pages = pages
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_s
|
24
|
-
inspect
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|