mindee 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Field in a list.
5
+ class ListFieldItem
6
+ # The confidence score, value will be between 0.0 and 1.0
7
+ # @return [Float]
8
+ attr_accessor :confidence
9
+ # @return [Array<Array<Float>>]
10
+ attr_reader :bbox
11
+ # @return [Array<Array<Float>>]
12
+ attr_reader :polygon
13
+ attr_reader :content
14
+
15
+ # @param prediction [Hash]
16
+ def initialize(prediction)
17
+ @content = prediction['content']
18
+ @confidence = prediction['confidence']
19
+ @polygon = prediction['polygon']
20
+ @bbox = Geometry.get_bbox_as_polygon(@polygon) unless @polygon.nil? || @polygon.empty?
21
+ end
22
+
23
+ # @return [String]
24
+ def to_s
25
+ @content.to_s
26
+ end
27
+ end
28
+
29
+ # Field where actual values are kept in a list (custom docs).
30
+ class ListField
31
+ # @return [Array<Mindee::ListFieldItem>]
32
+ attr_reader :values
33
+ # @return [Integer, nil]
34
+ attr_reader :page_id
35
+ # true if the field was reconstructed or computed using other fields.
36
+ # @return [Boolean]
37
+ attr_reader :reconstructed
38
+ # The confidence score, value will be between 0.0 and 1.0
39
+ # @return [Float]
40
+ attr_accessor :confidence
41
+
42
+ # @param prediction [Hash]
43
+ # @param page_id [Integer, nil]
44
+ # @param reconstructed [Boolean]
45
+ def initialize(prediction, page_id, reconstructed: false)
46
+ @values = []
47
+ @confidence = prediction['confidence']
48
+ @page_id = page_id || prediction['page_id']
49
+ @reconstructed = reconstructed
50
+
51
+ prediction['values'].each do |field|
52
+ @values.push(ListFieldItem.new(field))
53
+ end
54
+ end
55
+
56
+ # @return [Array]
57
+ def contents_list
58
+ @values.map(&:content)
59
+ end
60
+
61
+ # @return [String]
62
+ def contents_str(separator: ' ')
63
+ @values.map(&:to_s).join(separator)
64
+ end
65
+
66
+ # @return [String]
67
+ def to_s
68
+ contents_str(separator: ' ')
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,45 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Represents locale information
5
+ class Locale
6
+ # The confidence score, value will be between 0.0 and 1.0
7
+ # @return [Float]
8
+ attr_reader :confidence
9
+ # Language code in ISO 639-1 format.
10
+ # @return [String]
11
+ attr_reader :language
12
+ # Country code in ISO 3166-1 alpha-2 format.
13
+ # @return [String, nil]
14
+ attr_reader :country
15
+ # Currency code in ISO 4217 format.
16
+ # @return [String]
17
+ attr_reader :currency
18
+ # Language code, with country code when available.
19
+ # @return [String]
20
+ attr_reader :value
21
+
22
+ # @param prediction [Hash]
23
+ def initialize(prediction)
24
+ value_key = if prediction.include? 'value'
25
+ 'value'
26
+ else
27
+ 'language'
28
+ end
29
+ @confidence = prediction['confidence']
30
+ @value = prediction[value_key]
31
+ @language = prediction['language']
32
+ @country = prediction['country']
33
+ @currency = prediction['currency']
34
+ end
35
+
36
+ def to_s
37
+ out_str = String.new
38
+ out_str << "#{@value}; " if @value
39
+ out_str << "#{@language}; " if @language
40
+ out_str << "#{@country}; " if @country
41
+ out_str << "#{@currency}; " if @currency
42
+ out_str.strip
43
+ end
44
+ end
45
+ end
@@ -0,0 +1,26 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Represents page orientation.
5
+ class Orientation
6
+ # @return [Integer]
7
+ attr_reader :page_id
8
+ # The confidence score, value will be between 0.0 and 1.0
9
+ # @return [Float]
10
+ attr_reader :confidence
11
+ # A prediction among these 3 possible outputs:
12
+ # * 0 degrees: the page is already upright
13
+ # * 90 degrees: the page must be rotated clockwise to be upright
14
+ # * 270 degrees: the page must be rotated counterclockwise to be upright
15
+ # @return [Integer]
16
+ attr_reader :degrees
17
+
18
+ # @param prediction [Hash]
19
+ # @param page_id [Integer]
20
+ def initialize(prediction, page_id)
21
+ @degrees = prediction['degrees']
22
+ @confidence = prediction['confidence']
23
+ @page_id = page_id
24
+ end
25
+ end
26
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Mindee
6
+ # Represents payment details for invoices and receipts
7
+ class PaymentDetails < Field
8
+ attr_reader :account_number,
9
+ :iban,
10
+ :routing_number,
11
+ :swift
12
+
13
+ # @param prediction [Hash]
14
+ # @param page_id [Integer, nil]
15
+ # @param reconstructed [Boolean]
16
+ def initialize(prediction, page_id, reconstructed: false)
17
+ super
18
+ @account_number = prediction['account_number']
19
+ @iban = prediction['iban']
20
+ @routing_number = prediction['routing_number']
21
+ @swift = prediction['swift']
22
+ end
23
+
24
+ def to_s
25
+ out_str = String.new
26
+ out_str << "#{@account_number}; " if @account_number
27
+ out_str << "#{@iban}; " if @iban
28
+ out_str << "#{@routing_number}; " if @routing_number
29
+ out_str << "#{@swift}; " if @swift
30
+ out_str.strip
31
+ end
32
+ end
33
+ end
@@ -0,0 +1,35 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'base'
4
+
5
+ module Mindee
6
+ # Represents tax information.
7
+ class TaxField < Field
8
+ # Tax value as 3 decimal float
9
+ # @return [Float]
10
+ attr_reader :value
11
+ # Tax rate percentage
12
+ # @return [Float]
13
+ attr_reader :rate
14
+ # Tax code
15
+ # @return [String]
16
+ attr_reader :code
17
+
18
+ # @param prediction [Hash]
19
+ # @param page_id [Integer, nil]
20
+ def initialize(prediction, page_id)
21
+ super
22
+ @value = @value.round(3) unless @value.nil?
23
+ @rate = prediction['rate'].to_f unless prediction['rate'].nil?
24
+ @code = prediction['code'] unless prediction['code'] == 'None'
25
+ end
26
+
27
+ def to_s
28
+ out_str = String.new
29
+ out_str << "#{@value} " if @value
30
+ out_str << "#{@rate}% " if @rate
31
+ out_str << "#{@code} " if @code
32
+ out_str.strip
33
+ end
34
+ end
35
+ end
@@ -0,0 +1,11 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'fields/amount'
4
+ require_relative 'fields/base'
5
+ require_relative 'fields/company_registration'
6
+ require_relative 'fields/datefield'
7
+ require_relative 'fields/locale'
8
+ require_relative 'fields/orientation'
9
+ require_relative 'fields/payment_details'
10
+ require_relative 'fields/tax'
11
+ require_relative 'fields/list_field'
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Various helper functions for geometry.
5
+ module Geometry
6
+ # @return [Array<Float>]
7
+ def self.get_bbox(vertices)
8
+ x_min = vertices.map { |v| v[0] }.min
9
+ x_max = vertices.map { |v| v[0] }.max
10
+ y_min = vertices.map { |v| v[1] }.min
11
+ y_max = vertices.map { |v| v[1] }.max
12
+ [x_min, y_min, x_max, y_max]
13
+ end
14
+
15
+ # @return [Array<Array<Float>>]
16
+ def self.get_bbox_as_polygon(vertices)
17
+ x_min, y_min, x_max, y_max = get_bbox(vertices)
18
+ [[x_min, y_min], [x_max, y_min], [x_max, y_max], [x_min, y_max]]
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,153 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'stringio'
4
+ require 'origami'
5
+ require 'marcel'
6
+
7
+ # Monkey-patching for Origami
8
+ module PDFTools
9
+ def to_io_stream(params = {})
10
+ options = {
11
+ delinearize: true,
12
+ recompile: true,
13
+ decrypt: false,
14
+ }
15
+ options.update(params)
16
+
17
+ if frozen? # incompatible flags with frozen doc (signed)
18
+ options[:recompile] = nil
19
+ options[:rebuild_xrefs] = nil
20
+ options[:noindent] = nil
21
+ options[:obfuscate] = false
22
+ end
23
+ load_all_objects unless @loaded
24
+
25
+ intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
26
+ delinearize! if options[:delinearize] && linearized?
27
+ compile(options) if options[:recompile]
28
+
29
+ io_stream = StringIO.new(output(options))
30
+ io_stream.set_encoding Encoding::BINARY
31
+ io_stream
32
+ end
33
+ end
34
+
35
+ Origami::PDF.class_eval { include PDFTools }
36
+
37
+ module Mindee
38
+ ALLOWED_MIME_TYPES = [
39
+ 'application/pdf',
40
+ 'image/heic',
41
+ 'image/png',
42
+ 'image/jpeg',
43
+ 'image/tiff',
44
+ 'image/webp',
45
+ ].freeze
46
+
47
+ MAX_DOC_PAGES = 3
48
+
49
+ # Base class for loading documents.
50
+ class InputDocument
51
+ # @return [String]
52
+ attr_reader :filename
53
+ # @return [String]
54
+ attr_reader :filepath
55
+ # @return [String]
56
+ attr_reader :file_mimetype
57
+
58
+ # @param cut_pages [Boolean]
59
+ # @param max_pages [Integer]
60
+ def initialize(cut_pages, max_pages)
61
+ @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
62
+
63
+ unless ALLOWED_MIME_TYPES.include? @file_mimetype
64
+ raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
65
+ end
66
+
67
+ merge_pdf_pages(max_pages) if cut_pages && pdf?
68
+ end
69
+
70
+ def pdf?
71
+ @file_mimetype == 'application/pdf'
72
+ end
73
+
74
+ # @return [Integer]
75
+ def page_count
76
+ if pdf?
77
+ current_pdf = open_pdf
78
+ return current_pdf.pages.size
79
+ end
80
+ 1
81
+ end
82
+
83
+ # @param close [Boolean]
84
+ def read_document(close: true)
85
+ @io_stream.seek(0)
86
+ data = @io_stream.read
87
+ @io_stream.close if close
88
+ [data].pack('m')
89
+ end
90
+
91
+ private
92
+
93
+ # @param max_pages [Integer]
94
+ def merge_pdf_pages(max_pages)
95
+ current_pdf = open_pdf
96
+ return if current_pdf.pages.size <= MAX_DOC_PAGES
97
+
98
+ new_pdf = Origami::PDF.new
99
+
100
+ to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
101
+ to_insert.each_with_index do |page, idx|
102
+ new_pdf.insert_page(idx, page)
103
+ end
104
+ @io_stream = new_pdf.to_io_stream
105
+ end
106
+
107
+ # @return [Origami::PDF]
108
+ def open_pdf
109
+ pdf_parser = Origami::PDF::LinearParser.new({})
110
+ @io_stream.seek(0)
111
+ pdf_parser.parse(@io_stream)
112
+ end
113
+ end
114
+
115
+ # Load a document from a path.
116
+ class PathDocument < InputDocument
117
+ def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
118
+ @io_stream = File.open(filepath, 'rb')
119
+ @filepath = filepath
120
+ @filename = File.basename(filepath)
121
+ super(cut_pages, max_pages)
122
+ end
123
+ end
124
+
125
+ # Load a document from a base64 string.
126
+ class Base64Document < InputDocument
127
+ def initialize(base64_string, filename, cut_pages, max_pages: 3)
128
+ @io_stream = StringIO.new(base64_string.unpack1('m*'))
129
+ @io_stream.set_encoding Encoding::BINARY
130
+ @filename = filename
131
+ super(cut_pages, max_pages)
132
+ end
133
+ end
134
+
135
+ # Load a document from raw bytes.
136
+ class BytesDocument < InputDocument
137
+ def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
138
+ @io_stream = StringIO.new(raw_bytes)
139
+ @io_stream.set_encoding Encoding::BINARY
140
+ @filename = filename
141
+ super(cut_pages, max_pages)
142
+ end
143
+ end
144
+
145
+ # Load a document from a file handle.
146
+ class FileDocument < InputDocument
147
+ def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
148
+ @io_stream = file_handle
149
+ @filename = filename
150
+ super(cut_pages, max_pages)
151
+ end
152
+ end
153
+ end
@@ -0,0 +1,27 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Mindee
4
+ # Stores all response attributes.
5
+ class DocumentResponse
6
+ # @return [String]
7
+ attr_reader :document_type
8
+ # @return [Hash]
9
+ attr_reader :http_response
10
+ # @return [Mindee::Document]
11
+ attr_reader :document
12
+ # @return [Array<Mindee::Document>]
13
+ attr_reader :pages
14
+
15
+ # @param http_response [Hash]
16
+ def initialize(http_response, document_type, document, pages)
17
+ @http_response = http_response
18
+ @document_type = document_type
19
+ @document = document
20
+ @pages = pages
21
+ end
22
+
23
+ def to_s
24
+ inspect
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,21 @@
1
+ # frozen_string_literal: true
2
+
3
+ # Mindee
4
+ module Mindee
5
+ VERSION = '1.0.0'
6
+
7
+ def self.find_platform
8
+ host = RbConfig::CONFIG['host_os']
9
+ platforms = {
10
+ linux: %r{linux|cygwin},
11
+ windows: %r{mswin|mingw|bccwin|wince|emx|win32},
12
+ macos: %r{mac|darwin},
13
+ bsd: %r{bsd},
14
+ solaris: %r{solaris|sunos},
15
+ }
16
+ platforms.each do |os, regexp|
17
+ return os unless (regexp =~ host).nil?
18
+ end
19
+ end
20
+ PLATFORM = find_platform.freeze
21
+ end
data/lib/mindee.rb ADDED
@@ -0,0 +1,8 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'mindee/client'
4
+
5
+ module Mindee
6
+ class Error < StandardError
7
+ end
8
+ end
data/mindee.gemspec ADDED
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'lib/mindee/version'
4
+
5
+ Gem::Specification.new do |spec|
6
+ spec.name = 'mindee'
7
+ spec.version = Mindee::VERSION
8
+ spec.authors = ['Mindee']
9
+ spec.email = ['devrel@mindee.co']
10
+
11
+ spec.summary = 'Mindee API Helper Library for Ruby'
12
+ # spec.description = %q{TODO: Write a longer description or delete this line.}
13
+ spec.homepage = 'https://github.com/mindee/mindee-api-ruby'
14
+ spec.license = 'MIT'
15
+ spec.required_ruby_version = Gem::Requirement.new('>= 2.6.0')
16
+
17
+ spec.metadata['homepage_uri'] = 'https://mindee.com/'
18
+ spec.metadata['source_code_uri'] = 'https://github.com/mindee/mindee-api-ruby'
19
+ spec.metadata['changelog_uri'] = 'https://github.com/mindee/mindee-api-ruby/blob/main/CHANGELOG.md'
20
+ spec.metadata['rubygems_mfa_required'] = 'true'
21
+
22
+ # Specify which files should be added to the gem when it is released.
23
+ # The `git ls-files -z` loads the files in the RubyGem that have been added into git.
24
+ spec.files = Dir.chdir(File.expand_path(__dir__)) do
25
+ `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(.github|spec|features)/}) }
26
+ end
27
+ spec.bindir = 'bin'
28
+ spec.executables = spec.files.grep(%r{^bin/}) { |f| File.basename(f) }
29
+ spec.require_paths = ['lib']
30
+
31
+ spec.add_runtime_dependency 'marcel', '~> 1.0.2'
32
+ spec.add_runtime_dependency 'mrz', '~> 0.2.0'
33
+ spec.add_runtime_dependency 'origami', '~> 2.1.0'
34
+ end
metadata ADDED
@@ -0,0 +1,128 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: mindee
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Mindee
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2022-07-26 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: marcel
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 1.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 1.0.2
27
+ - !ruby/object:Gem::Dependency
28
+ name: mrz
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: 0.2.0
34
+ type: :runtime
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: 0.2.0
41
+ - !ruby/object:Gem::Dependency
42
+ name: origami
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 2.1.0
48
+ type: :runtime
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 2.1.0
55
+ description:
56
+ email:
57
+ - devrel@mindee.co
58
+ executables:
59
+ - console
60
+ - mindee.rb
61
+ extensions: []
62
+ extra_rdoc_files: []
63
+ files:
64
+ - ".gitattributes"
65
+ - ".gitignore"
66
+ - ".rubocop.yml"
67
+ - CHANGELOG.md
68
+ - CODE_OF_CONDUCT.md
69
+ - Gemfile
70
+ - LICENSE
71
+ - README.md
72
+ - Rakefile
73
+ - bin/console
74
+ - bin/mindee.rb
75
+ - lib/mindee.rb
76
+ - lib/mindee/client.rb
77
+ - lib/mindee/document_config.rb
78
+ - lib/mindee/documents.rb
79
+ - lib/mindee/documents/base.rb
80
+ - lib/mindee/documents/custom.rb
81
+ - lib/mindee/documents/financial_doc.rb
82
+ - lib/mindee/documents/invoice.rb
83
+ - lib/mindee/documents/passport.rb
84
+ - lib/mindee/documents/receipt.rb
85
+ - lib/mindee/endpoint.rb
86
+ - lib/mindee/fields.rb
87
+ - lib/mindee/fields/amount.rb
88
+ - lib/mindee/fields/base.rb
89
+ - lib/mindee/fields/company_registration.rb
90
+ - lib/mindee/fields/datefield.rb
91
+ - lib/mindee/fields/list_field.rb
92
+ - lib/mindee/fields/locale.rb
93
+ - lib/mindee/fields/orientation.rb
94
+ - lib/mindee/fields/payment_details.rb
95
+ - lib/mindee/fields/tax.rb
96
+ - lib/mindee/geometry.rb
97
+ - lib/mindee/inputs.rb
98
+ - lib/mindee/response.rb
99
+ - lib/mindee/version.rb
100
+ - mindee.gemspec
101
+ homepage: https://github.com/mindee/mindee-api-ruby
102
+ licenses:
103
+ - MIT
104
+ metadata:
105
+ homepage_uri: https://mindee.com/
106
+ source_code_uri: https://github.com/mindee/mindee-api-ruby
107
+ changelog_uri: https://github.com/mindee/mindee-api-ruby/blob/main/CHANGELOG.md
108
+ rubygems_mfa_required: 'true'
109
+ post_install_message:
110
+ rdoc_options: []
111
+ require_paths:
112
+ - lib
113
+ required_ruby_version: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - ">="
116
+ - !ruby/object:Gem::Version
117
+ version: 2.6.0
118
+ required_rubygems_version: !ruby/object:Gem::Requirement
119
+ requirements:
120
+ - - ">="
121
+ - !ruby/object:Gem::Version
122
+ version: '0'
123
+ requirements: []
124
+ rubygems_version: 3.1.2
125
+ signing_key:
126
+ specification_version: 4
127
+ summary: Mindee API Helper Library for Ruby
128
+ test_files: []