mindee 1.2.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +1 -1
- data/.rubocop.yml +2 -2
- data/.yardopts +4 -0
- data/CHANGELOG.md +21 -0
- data/README.md +46 -23
- data/Rakefile +6 -1
- data/bin/mindee.rb +70 -61
- data/docs/ruby-api-builder.md +131 -0
- data/docs/ruby-getting-started.md +265 -0
- data/docs/ruby-invoice-ocr.md +261 -0
- data/docs/ruby-passport-ocr.md +156 -0
- data/docs/ruby-receipt-ocr.md +170 -0
- data/lib/mindee/client.rb +128 -93
- data/lib/mindee/document_config.rb +22 -154
- data/lib/mindee/geometry.rb +105 -8
- data/lib/mindee/http/endpoint.rb +80 -0
- data/lib/mindee/input/pdf_processing.rb +106 -0
- data/lib/mindee/input/sources.rb +97 -0
- data/lib/mindee/input.rb +3 -0
- data/lib/mindee/parsing/document.rb +31 -0
- data/lib/mindee/parsing/error.rb +22 -0
- data/lib/mindee/parsing/inference.rb +53 -0
- data/lib/mindee/parsing/page.rb +46 -0
- data/lib/mindee/parsing/prediction/base.rb +30 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
- data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
- data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
- data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
- data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
- data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
- data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
- data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
- data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
- data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
- data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
- data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
- data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
- data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
- data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
- data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
- data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
- data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
- data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
- data/lib/mindee/parsing/prediction.rb +12 -0
- data/lib/mindee/parsing.rb +4 -0
- data/lib/mindee/version.rb +1 -1
- data/mindee.gemspec +2 -1
- metadata +57 -24
- data/lib/mindee/documents/base.rb +0 -35
- data/lib/mindee/documents/custom.rb +0 -65
- data/lib/mindee/documents/financial_doc.rb +0 -135
- data/lib/mindee/documents/invoice.rb +0 -162
- data/lib/mindee/documents/passport.rb +0 -163
- data/lib/mindee/documents/receipt.rb +0 -109
- data/lib/mindee/documents.rb +0 -7
- data/lib/mindee/endpoint.rb +0 -105
- data/lib/mindee/fields/orientation.rb +0 -26
- data/lib/mindee/fields.rb +0 -11
- data/lib/mindee/inputs.rb +0 -153
- data/lib/mindee/response.rb +0 -27
@@ -1,26 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Mindee
|
4
|
-
# Represents page orientation.
|
5
|
-
class Orientation
|
6
|
-
# @return [Integer]
|
7
|
-
attr_reader :page_id
|
8
|
-
# The confidence score, value will be between 0.0 and 1.0
|
9
|
-
# @return [Float]
|
10
|
-
attr_reader :confidence
|
11
|
-
# A prediction among these 3 possible outputs:
|
12
|
-
# * 0 degrees: the page is already upright
|
13
|
-
# * 90 degrees: the page must be rotated clockwise to be upright
|
14
|
-
# * 270 degrees: the page must be rotated counterclockwise to be upright
|
15
|
-
# @return [Integer]
|
16
|
-
attr_reader :degrees
|
17
|
-
|
18
|
-
# @param prediction [Hash]
|
19
|
-
# @param page_id [Integer]
|
20
|
-
def initialize(prediction, page_id)
|
21
|
-
@degrees = prediction['degrees']
|
22
|
-
@confidence = prediction['confidence']
|
23
|
-
@page_id = page_id
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
data/lib/mindee/fields.rb
DELETED
@@ -1,11 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative 'fields/amount'
|
4
|
-
require_relative 'fields/base'
|
5
|
-
require_relative 'fields/company_registration'
|
6
|
-
require_relative 'fields/datefield'
|
7
|
-
require_relative 'fields/locale'
|
8
|
-
require_relative 'fields/orientation'
|
9
|
-
require_relative 'fields/payment_details'
|
10
|
-
require_relative 'fields/tax'
|
11
|
-
require_relative 'fields/custom_docs'
|
data/lib/mindee/inputs.rb
DELETED
@@ -1,153 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'stringio'
|
4
|
-
require 'origami'
|
5
|
-
require 'marcel'
|
6
|
-
|
7
|
-
# Monkey-patching for Origami
|
8
|
-
module PDFTools
|
9
|
-
def to_io_stream(params = {})
|
10
|
-
options = {
|
11
|
-
delinearize: true,
|
12
|
-
recompile: true,
|
13
|
-
decrypt: false,
|
14
|
-
}
|
15
|
-
options.update(params)
|
16
|
-
|
17
|
-
if frozen? # incompatible flags with frozen doc (signed)
|
18
|
-
options[:recompile] = nil
|
19
|
-
options[:rebuild_xrefs] = nil
|
20
|
-
options[:noindent] = nil
|
21
|
-
options[:obfuscate] = false
|
22
|
-
end
|
23
|
-
load_all_objects unless @loaded
|
24
|
-
|
25
|
-
intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
|
26
|
-
delinearize! if options[:delinearize] && linearized?
|
27
|
-
compile(options) if options[:recompile]
|
28
|
-
|
29
|
-
io_stream = StringIO.new(output(options))
|
30
|
-
io_stream.set_encoding Encoding::BINARY
|
31
|
-
io_stream
|
32
|
-
end
|
33
|
-
end
|
34
|
-
|
35
|
-
Origami::PDF.class_eval { include PDFTools }
|
36
|
-
|
37
|
-
module Mindee
|
38
|
-
ALLOWED_MIME_TYPES = [
|
39
|
-
'application/pdf',
|
40
|
-
'image/heic',
|
41
|
-
'image/png',
|
42
|
-
'image/jpeg',
|
43
|
-
'image/tiff',
|
44
|
-
'image/webp',
|
45
|
-
].freeze
|
46
|
-
|
47
|
-
MAX_DOC_PAGES = 3
|
48
|
-
|
49
|
-
# Base class for loading documents.
|
50
|
-
class InputDocument
|
51
|
-
# @return [String]
|
52
|
-
attr_reader :filename
|
53
|
-
# @return [String]
|
54
|
-
attr_reader :filepath
|
55
|
-
# @return [String]
|
56
|
-
attr_reader :file_mimetype
|
57
|
-
|
58
|
-
# @param cut_pages [Boolean]
|
59
|
-
# @param max_pages [Integer]
|
60
|
-
def initialize(cut_pages, max_pages)
|
61
|
-
@file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
|
62
|
-
|
63
|
-
unless ALLOWED_MIME_TYPES.include? @file_mimetype
|
64
|
-
raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
|
65
|
-
end
|
66
|
-
|
67
|
-
merge_pdf_pages(max_pages) if cut_pages && pdf?
|
68
|
-
end
|
69
|
-
|
70
|
-
def pdf?
|
71
|
-
@file_mimetype == 'application/pdf'
|
72
|
-
end
|
73
|
-
|
74
|
-
# @return [Integer]
|
75
|
-
def page_count
|
76
|
-
if pdf?
|
77
|
-
current_pdf = open_pdf
|
78
|
-
return current_pdf.pages.size
|
79
|
-
end
|
80
|
-
1
|
81
|
-
end
|
82
|
-
|
83
|
-
# @param close [Boolean]
|
84
|
-
def read_document(close: true)
|
85
|
-
@io_stream.seek(0)
|
86
|
-
data = @io_stream.read
|
87
|
-
@io_stream.close if close
|
88
|
-
[data].pack('m')
|
89
|
-
end
|
90
|
-
|
91
|
-
private
|
92
|
-
|
93
|
-
# @param max_pages [Integer]
|
94
|
-
def merge_pdf_pages(max_pages)
|
95
|
-
current_pdf = open_pdf
|
96
|
-
return if current_pdf.pages.size <= MAX_DOC_PAGES
|
97
|
-
|
98
|
-
new_pdf = Origami::PDF.new
|
99
|
-
|
100
|
-
to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
|
101
|
-
to_insert.each do |page|
|
102
|
-
new_pdf.append_page(page)
|
103
|
-
end
|
104
|
-
@io_stream = new_pdf.to_io_stream
|
105
|
-
end
|
106
|
-
|
107
|
-
# @return [Origami::PDF]
|
108
|
-
def open_pdf
|
109
|
-
pdf_parser = Origami::PDF::LinearParser.new({})
|
110
|
-
@io_stream.seek(0)
|
111
|
-
pdf_parser.parse(@io_stream)
|
112
|
-
end
|
113
|
-
end
|
114
|
-
|
115
|
-
# Load a document from a path.
|
116
|
-
class PathDocument < InputDocument
|
117
|
-
def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
|
118
|
-
@io_stream = File.open(filepath, 'rb')
|
119
|
-
@filepath = filepath
|
120
|
-
@filename = File.basename(filepath)
|
121
|
-
super(cut_pages, max_pages)
|
122
|
-
end
|
123
|
-
end
|
124
|
-
|
125
|
-
# Load a document from a base64 string.
|
126
|
-
class Base64Document < InputDocument
|
127
|
-
def initialize(base64_string, filename, cut_pages, max_pages: 3)
|
128
|
-
@io_stream = StringIO.new(base64_string.unpack1('m*'))
|
129
|
-
@io_stream.set_encoding Encoding::BINARY
|
130
|
-
@filename = filename
|
131
|
-
super(cut_pages, max_pages)
|
132
|
-
end
|
133
|
-
end
|
134
|
-
|
135
|
-
# Load a document from raw bytes.
|
136
|
-
class BytesDocument < InputDocument
|
137
|
-
def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
|
138
|
-
@io_stream = StringIO.new(raw_bytes)
|
139
|
-
@io_stream.set_encoding Encoding::BINARY
|
140
|
-
@filename = filename
|
141
|
-
super(cut_pages, max_pages)
|
142
|
-
end
|
143
|
-
end
|
144
|
-
|
145
|
-
# Load a document from a file handle.
|
146
|
-
class FileDocument < InputDocument
|
147
|
-
def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
|
148
|
-
@io_stream = file_handle
|
149
|
-
@filename = filename
|
150
|
-
super(cut_pages, max_pages)
|
151
|
-
end
|
152
|
-
end
|
153
|
-
end
|
data/lib/mindee/response.rb
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
module Mindee
|
4
|
-
# Stores all response attributes.
|
5
|
-
class DocumentResponse
|
6
|
-
# @return [String]
|
7
|
-
attr_reader :document_type
|
8
|
-
# @return [Hash]
|
9
|
-
attr_reader :http_response
|
10
|
-
# @return [Mindee::Document]
|
11
|
-
attr_reader :document
|
12
|
-
# @return [Array<Mindee::Document>]
|
13
|
-
attr_reader :pages
|
14
|
-
|
15
|
-
# @param http_response [Hash]
|
16
|
-
def initialize(http_response, document_type, document, pages)
|
17
|
-
@http_response = http_response
|
18
|
-
@document_type = document_type
|
19
|
-
@document = document
|
20
|
-
@pages = pages
|
21
|
-
end
|
22
|
-
|
23
|
-
def to_s
|
24
|
-
inspect
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|