mindee 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Field in a list.
5
+ class ListFieldItem
6
+ # The confidence score, value will be between 0.0 and 1.0
7
+ # @return [Float]
8
+ attr_accessor :confidence
9
+ # @return [Array<Array<Float>>]
10
+ attr_reader :bbox
11
+ # @return [Array<Array<Float>>]
12
+ attr_reader :polygon
13
+ attr_reader :content
14
+
15
+ # @param prediction [Hash]
16
+ def initialize(prediction)
17
+ @content = prediction['content']
18
+ @confidence = prediction['confidence']
19
+ @polygon = prediction['polygon']
20
+ @bbox = Geometry.get_bbox_as_polygon(@polygon) unless @polygon.nil? || @polygon.empty?
21
+ end
22
+
23
+ # @return [String]
24
+ def to_s
25
+ @content.to_s
26
+ end
27
+ end
28
+
29
+ # Field where actual values are kept in a list (custom docs).
30
+ class ListField
31
+ # @return [Array<Mindee::ListFieldItem>]
32
+ attr_reader :values
33
+ # @return [Integer, nil]
34
+ attr_reader :page_id
35
+ # true if the field was reconstructed or computed using other fields.
36
+ # @return [Boolean]
37
+ attr_reader :reconstructed
38
+ # The confidence score, value will be between 0.0 and 1.0
39
+ # @return [Float]
40
+ attr_accessor :confidence
41
+
42
+ # @param prediction [Hash]
43
+ # @param page_id [Integer, nil]
44
+ # @param reconstructed [Boolean]
45
+ def initialize(prediction, page_id, reconstructed: false)
46
+ @values = []
47
+ @confidence = prediction['confidence']
48
+ @page_id = page_id || prediction['page_id']
49
+ @reconstructed = reconstructed
50
+
51
+ prediction['values'].each do |field|
52
+ @values.push(ListFieldItem.new(field))
53
+ end
54
+ end
55
+
56
+ # @return [Array]
57
+ def contents_list
58
+ @values.map(&:content)
59
+ end
60
+
61
+ # @return [String]
62
+ def contents_str(separator: ' ')
63
+ @values.map(&:to_s).join(separator)
64
+ end
65
+
66
+ # @return [String]
67
+ def to_s
68
+ contents_str(separator: ' ')
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Represents locale information
5
+ class Locale
6
+ # The confidence score, value will be between 0.0 and 1.0
7
+ # @return [Float]
8
+ attr_reader :confidence
9
+ # Language code in ISO 639-1 format.
10
+ # @return [String]
11
+ attr_reader :language
12
+ # Country code in ISO 3166-1 alpha-2 format.
13
+ # @return [String, nil]
14
+ attr_reader :country
15
+ # Currency code in ISO 4217 format.
16
+ # @return [String]
17
+ attr_reader :currency
18
+ # Language code, with country code when available.
19
+ # @return [String]
20
+ attr_reader :value
21
+
22
+ # @param prediction [Hash]
23
+ def initialize(prediction)
24
+ value_key = if prediction.include? 'value'
25
+ 'value'
26
+ else
27
+ 'language'
28
+ end
29
+ @confidence = prediction['confidence']
30
+ @value = prediction[value_key]
31
+ @language = prediction['language']
32
+ @country = prediction['country']
33
+ @currency = prediction['currency']
34
+ end
35
+
36
+ def to_s
37
+ out_str = String.new
38
+ out_str << "#{@value}; " if @value
39
+ out_str << "#{@language}; " if @language
40
+ out_str << "#{@country}; " if @country
41
+ out_str << "#{@currency}; " if @currency
42
+ out_str.strip
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Represents page orientation.
5
+ class Orientation
6
+ # @return [Integer]
7
+ attr_reader :page_id
8
+ # The confidence score, value will be between 0.0 and 1.0
9
+ # @return [Float]
10
+ attr_reader :confidence
11
+ # A prediction among these 3 possible outputs:
12
+ # * 0 degrees: the page is already upright
13
+ # * 90 degrees: the page must be rotated clockwise to be upright
14
+ # * 270 degrees: the page must be rotated counterclockwise to be upright
15
+ # @return [Integer]
16
+ attr_reader :degrees
17
+
18
+ # @param prediction [Hash]
19
+ # @param page_id [Integer]
20
+ def initialize(prediction, page_id)
21
+ @degrees = prediction['degrees']
22
+ @confidence = prediction['confidence']
23
+ @page_id = page_id
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Mindee
6
+ # Represents payment details for invoices and receipts
7
+ class PaymentDetails < Field
8
+ attr_reader :account_number,
9
+ :iban,
10
+ :routing_number,
11
+ :swift
12
+
13
+ # @param prediction [Hash]
14
+ # @param page_id [Integer, nil]
15
+ # @param reconstructed [Boolean]
16
+ def initialize(prediction, page_id, reconstructed: false)
17
+ super
18
+ @account_number = prediction['account_number']
19
+ @iban = prediction['iban']
20
+ @routing_number = prediction['routing_number']
21
+ @swift = prediction['swift']
22
+ end
23
+
24
+ def to_s
25
+ out_str = String.new
26
+ out_str << "#{@account_number}; " if @account_number
27
+ out_str << "#{@iban}; " if @iban
28
+ out_str << "#{@routing_number}; " if @routing_number
29
+ out_str << "#{@swift}; " if @swift
30
+ out_str.strip
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Mindee
6
+ # Represents tax information.
7
+ class TaxField < Field
8
+ # Tax value as 3 decimal float
9
+ # @return [Float]
10
+ attr_reader :value
11
+ # Tax rate percentage
12
+ # @return [Float]
13
+ attr_reader :rate
14
+ # Tax code
15
+ # @return [String]
16
+ attr_reader :code
17
+
18
+ # @param prediction [Hash]
19
+ # @param page_id [Integer, nil]
20
+ def initialize(prediction, page_id)
21
+ super
22
+ @value = @value.round(3) unless @value.nil?
23
+ @rate = prediction['rate'].to_f unless prediction['rate'].nil?
24
+ @code = prediction['code'] unless prediction['code'] == 'None'
25
+ end
26
+
27
+ def to_s
28
+ out_str = String.new
29
+ out_str << "#{@value} " if @value
30
+ out_str << "#{@rate}% " if @rate
31
+ out_str << "#{@code} " if @code
32
+ out_str.strip
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'fields/amount'
4
+ require_relative 'fields/base'
5
+ require_relative 'fields/company_registration'
6
+ require_relative 'fields/datefield'
7
+ require_relative 'fields/locale'
8
+ require_relative 'fields/orientation'
9
+ require_relative 'fields/payment_details'
10
+ require_relative 'fields/tax'
11
+ require_relative 'fields/list_field'
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions for geometry.
5
+ module Geometry
6
+ # @return [Array<Float>]
7
+ def self.get_bbox(vertices)
8
+ x_min = vertices.map { |v| v[0] }.min
9
+ x_max = vertices.map { |v| v[0] }.max
10
+ y_min = vertices.map { |v| v[1] }.min
11
+ y_max = vertices.map { |v| v[1] }.max
12
+ [x_min, y_min, x_max, y_max]
13
+ end
14
+
15
+ # @return [Array<Array<Float>>]
16
+ def self.get_bbox_as_polygon(vertices)
17
+ x_min, y_min, x_max, y_max = get_bbox(vertices)
18
+ [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]]
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+ require 'origami'
5
+ require 'marcel'
6
+
7
+ # Monkey-patching for Origami
8
+ module PDFTools
9
+ def to_io_stream(params = {})
10
+ options = {
11
+ delinearize: true,
12
+ recompile: true,
13
+ decrypt: false,
14
+ }
15
+ options.update(params)
16
+
17
+ if frozen? # incompatible flags with frozen doc (signed)
18
+ options[:recompile] = nil
19
+ options[:rebuild_xrefs] = nil
20
+ options[:noindent] = nil
21
+ options[:obfuscate] = false
22
+ end
23
+ load_all_objects unless @loaded
24
+
25
+ intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
26
+ delinearize! if options[:delinearize] && linearized?
27
+ compile(options) if options[:recompile]
28
+
29
+ io_stream = StringIO.new(output(options))
30
+ io_stream.set_encoding Encoding::BINARY
31
+ io_stream
32
+ end
33
+ end
34
+
35
+ Origami::PDF.class_eval { include PDFTools }
36
+
37
+ module Mindee
38
+ ALLOWED_MIME_TYPES = [
39
+ 'application/pdf',
40
+ 'image/heic',
41
+ 'image/png',
42
+ 'image/jpeg',
43
+ 'image/tiff',
44
+ 'image/webp',
45
+ ].freeze
46
+
47
+ MAX_DOC_PAGES = 3
48
+
49
+ # Base class for loading documents.
50
+ class InputDocument
51
+ # @return [String]
52
+ attr_reader :filename
53
+ # @return [String]
54
+ attr_reader :filepath
55
+ # @return [String]
56
+ attr_reader :file_mimetype
57
+
58
+ # @param cut_pages [Boolean]
59
+ # @param max_pages [Integer]
60
+ def initialize(cut_pages, max_pages)
61
+ @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
62
+
63
+ unless ALLOWED_MIME_TYPES.include? @file_mimetype
64
+ raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
65
+ end
66
+
67
+ merge_pdf_pages(max_pages) if cut_pages && pdf?
68
+ end
69
+
70
+ def pdf?
71
+ @file_mimetype == 'application/pdf'
72
+ end
73
+
74
+ # @return [Integer]
75
+ def page_count
76
+ if pdf?
77
+ current_pdf = open_pdf
78
+ return current_pdf.pages.size
79
+ end
80
+ 1
81
+ end
82
+
83
+ # @param close [Boolean]
84
+ def read_document(close: true)
85
+ @io_stream.seek(0)
86
+ data = @io_stream.read
87
+ @io_stream.close if close
88
+ [data].pack('m')
89
+ end
90
+
91
+ private
92
+
93
+ # @param max_pages [Integer]
94
+ def merge_pdf_pages(max_pages)
95
+ current_pdf = open_pdf
96
+ return if current_pdf.pages.size <= MAX_DOC_PAGES
97
+
98
+ new_pdf = Origami::PDF.new
99
+
100
+ to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
101
+ to_insert.each_with_index do |page, idx|
102
+ new_pdf.insert_page(idx, page)
103
+ end
104
+ @io_stream = new_pdf.to_io_stream
105
+ end
106
+
107
+ # @return [Origami::PDF]
108
+ def open_pdf
109
+ pdf_parser = Origami::PDF::LinearParser.new({})
110
+ @io_stream.seek(0)
111
+ pdf_parser.parse(@io_stream)
112
+ end
113
+ end
114
+
115
+ # Load a document from a path.
116
+ class PathDocument < InputDocument
117
+ def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
118
+ @io_stream = File.open(filepath, 'rb')
119
+ @filepath = filepath
120
+ @filename = File.basename(filepath)
121
+ super(cut_pages, max_pages)
122
+ end
123
+ end
124
+
125
+ # Load a document from a base64 string.
126
+ class Base64Document < InputDocument
127
+ def initialize(base64_string, filename, cut_pages, max_pages: 3)
128
+ @io_stream = StringIO.new(base64_string.unpack1('m*'))
129
+ @io_stream.set_encoding Encoding::BINARY
130
+ @filename = filename
131
+ super(cut_pages, max_pages)
132
+ end
133
+ end
134
+
135
+ # Load a document from raw bytes.
136
+ class BytesDocument < InputDocument
137
+ def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
138
+ @io_stream = StringIO.new(raw_bytes)
139
+ @io_stream.set_encoding Encoding::BINARY
140
+ @filename = filename
141
+ super(cut_pages, max_pages)
142
+ end
143
+ end
144
+
145
+ # Load a document from a file handle.
146
+ class FileDocument < InputDocument
147
+ def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
148
+ @io_stream = file_handle
149
+ @filename = filename
150
+ super(cut_pages, max_pages)
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Stores all response attributes.
5
+ class DocumentResponse
6
+ # @return [String]
7
+ attr_reader :document_type
8
+ # @return [Hash]
9
+ attr_reader :http_response
10
+ # @return [Mindee::Document]
11
+ attr_reader :document
12
+ # @return [Array<Mindee::Document>]
13
+ attr_reader :pages
14
+
15
+ # @param http_response [Hash]
16
+ def initialize(http_response, document_type, document, pages)
17
+ @http_response = http_response
18
+ @document_type = document_type
19
+ @document = document
20
+ @pages = pages
21
+ end
22
+
23
+ def to_s
24
+ inspect
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Mindee
4
+ module Mindee
5
+ VERSION = '1.0.0'
6
+
7
+ def self.find_platform
8
+ host = RbConfig::CONFIG['host_os']
9
+ platforms = {
10
+ linux: %r{linux|cygwin},
11
+ windows: %r{mswin|mingw|bccwin|wince|emx|win32},
12
+ macos: %r{mac|darwin},
13
+ bsd: %r{bsd},
14
+ solaris: %r{solaris|sunos},
15
+ }
16
+ platforms.each do |os, regexp|
17
+ return os unless (regexp =~ host).nil?
18
+ end
19
+ end
20
+ PLATFORM = find_platform.freeze
21
+ end
data/lib/mindee.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mindee/client'
4
+
5
+ module Mindee
6
+ class Error < StandardError
7
+ end
8
+ end
data/mindee.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/mindee/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'mindee'
7
+ spec.version = Mindee::VERSION
8
+ spec.authors = ['Mindee']
9
+ spec.email = ['devrel@mindee.co']
10
+
11
+ spec.summary = 'Mindee API Helper Library for Ruby'
12
+ # spec.description = %q{TODO: Write a longer description or delete this line.}
13
+ spec.homepage = 'https://github.com/mindee/mindee-api-ruby'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.6.0')
16
+
17
+ spec.metadata['homepage_uri'] = 'https://mindee.com/'
18
+ spec.metadata['source_code_uri'] = 'https://github.com/mindee/mindee-api-ruby'
19
+ spec.metadata['changelog_uri'] = 'https://github.com/mindee/mindee-api-ruby/blob/main/CHANGELOG.md'
20
+ spec.metadata['rubygems_mfa_required'] = 'true'
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(.github|spec|features)/}) }
26
+ end
27
+ spec.bindir = 'bin'
28
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_runtime_dependency 'marcel', '~> 1.0.2'
32
+ spec.add_runtime_dependency 'mrz', '~> 0.2.0'
33
+ spec.add_runtime_dependency 'origami', '~> 2.1.0'
34
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mindee
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Mindee
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: marcel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: mrz
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: origami
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.1.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.1.0
55
+ description:
56
+ email:
57
+ - devrel@mindee.co
58
+ executables:
59
+ - console
60
+ - mindee.rb
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - ".gitattributes"
65
+ - ".gitignore"
66
+ - ".rubocop.yml"
67
+ - CHANGELOG.md
68
+ - CODE_OF_CONDUCT.md
69
+ - Gemfile
70
+ - LICENSE
71
+ - README.md
72
+ - Rakefile
73
+ - bin/console
74
+ - bin/mindee.rb
75
+ - lib/mindee.rb
76
+ - lib/mindee/client.rb
77
+ - lib/mindee/document_config.rb
78
+ - lib/mindee/documents.rb
79
+ - lib/mindee/documents/base.rb
80
+ - lib/mindee/documents/custom.rb
81
+ - lib/mindee/documents/financial_doc.rb
82
+ - lib/mindee/documents/invoice.rb
83
+ - lib/mindee/documents/passport.rb
84
+ - lib/mindee/documents/receipt.rb
85
+ - lib/mindee/endpoint.rb
86
+ - lib/mindee/fields.rb
87
+ - lib/mindee/fields/amount.rb
88
+ - lib/mindee/fields/base.rb
89
+ - lib/mindee/fields/company_registration.rb
90
+ - lib/mindee/fields/datefield.rb
91
+ - lib/mindee/fields/list_field.rb
92
+ - lib/mindee/fields/locale.rb
93
+ - lib/mindee/fields/orientation.rb
94
+ - lib/mindee/fields/payment_details.rb
95
+ - lib/mindee/fields/tax.rb
96
+ - lib/mindee/geometry.rb
97
+ - lib/mindee/inputs.rb
98
+ - lib/mindee/response.rb
99
+ - lib/mindee/version.rb
100
+ - mindee.gemspec
101
+ homepage: https://github.com/mindee/mindee-api-ruby
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ homepage_uri: https://mindee.com/
106
+ source_code_uri: https://github.com/mindee/mindee-api-ruby
107
+ changelog_uri: https://github.com/mindee/mindee-api-ruby/blob/main/CHANGELOG.md
108
+ rubygems_mfa_required: 'true'
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: 2.6.0
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubygems_version: 3.1.2
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Mindee API Helper Library for Ruby
128
+ test_files: []