mindee 1.2.0 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +21 -0
  6. data/README.md +46 -23
  7. data/Rakefile +6 -1
  8. data/bin/mindee.rb +70 -61
  9. data/docs/ruby-api-builder.md +131 -0
  10. data/docs/ruby-getting-started.md +265 -0
  11. data/docs/ruby-invoice-ocr.md +261 -0
  12. data/docs/ruby-passport-ocr.md +156 -0
  13. data/docs/ruby-receipt-ocr.md +170 -0
  14. data/lib/mindee/client.rb +128 -93
  15. data/lib/mindee/document_config.rb +22 -154
  16. data/lib/mindee/geometry.rb +105 -8
  17. data/lib/mindee/http/endpoint.rb +80 -0
  18. data/lib/mindee/input/pdf_processing.rb +106 -0
  19. data/lib/mindee/input/sources.rb +97 -0
  20. data/lib/mindee/input.rb +3 -0
  21. data/lib/mindee/parsing/document.rb +31 -0
  22. data/lib/mindee/parsing/error.rb +22 -0
  23. data/lib/mindee/parsing/inference.rb +53 -0
  24. data/lib/mindee/parsing/page.rb +46 -0
  25. data/lib/mindee/parsing/prediction/base.rb +30 -0
  26. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  29. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  30. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  32. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  33. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  34. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  35. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  36. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  37. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  38. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  39. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  40. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  41. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  42. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  44. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  45. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  46. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  47. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  48. data/lib/mindee/parsing/prediction.rb +12 -0
  49. data/lib/mindee/parsing.rb +4 -0
  50. data/lib/mindee/version.rb +1 -1
  51. data/mindee.gemspec +2 -1
  52. metadata +57 -24
  53. data/lib/mindee/documents/base.rb +0 -35
  54. data/lib/mindee/documents/custom.rb +0 -65
  55. data/lib/mindee/documents/financial_doc.rb +0 -135
  56. data/lib/mindee/documents/invoice.rb +0 -162
  57. data/lib/mindee/documents/passport.rb +0 -163
  58. data/lib/mindee/documents/receipt.rb +0 -109
  59. data/lib/mindee/documents.rb +0 -7
  60. data/lib/mindee/endpoint.rb +0 -105
  61. data/lib/mindee/fields/orientation.rb +0 -26
  62. data/lib/mindee/fields.rb +0 -11
  63. data/lib/mindee/inputs.rb +0 -153
  64. data/lib/mindee/response.rb +0 -27
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Represents page orientation.
5
- class Orientation
6
- # @return [Integer]
7
- attr_reader :page_id
8
- # The confidence score, value will be between 0.0 and 1.0
9
- # @return [Float]
10
- attr_reader :confidence
11
- # A prediction among these 3 possible outputs:
12
- # * 0 degrees: the page is already upright
13
- # * 90 degrees: the page must be rotated clockwise to be upright
14
- # * 270 degrees: the page must be rotated counterclockwise to be upright
15
- # @return [Integer]
16
- attr_reader :degrees
17
-
18
- # @param prediction [Hash]
19
- # @param page_id [Integer]
20
- def initialize(prediction, page_id)
21
- @degrees = prediction['degrees']
22
- @confidence = prediction['confidence']
23
- @page_id = page_id
24
- end
25
- end
26
- end
data/lib/mindee/fields.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'fields/amount'
4
- require_relative 'fields/base'
5
- require_relative 'fields/company_registration'
6
- require_relative 'fields/datefield'
7
- require_relative 'fields/locale'
8
- require_relative 'fields/orientation'
9
- require_relative 'fields/payment_details'
10
- require_relative 'fields/tax'
11
- require_relative 'fields/custom_docs'
data/lib/mindee/inputs.rb DELETED
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'stringio'
4
- require 'origami'
5
- require 'marcel'
6
-
7
- # Monkey-patching for Origami
8
- module PDFTools
9
- def to_io_stream(params = {})
10
- options = {
11
- delinearize: true,
12
- recompile: true,
13
- decrypt: false,
14
- }
15
- options.update(params)
16
-
17
- if frozen? # incompatible flags with frozen doc (signed)
18
- options[:recompile] = nil
19
- options[:rebuild_xrefs] = nil
20
- options[:noindent] = nil
21
- options[:obfuscate] = false
22
- end
23
- load_all_objects unless @loaded
24
-
25
- intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
26
- delinearize! if options[:delinearize] && linearized?
27
- compile(options) if options[:recompile]
28
-
29
- io_stream = StringIO.new(output(options))
30
- io_stream.set_encoding Encoding::BINARY
31
- io_stream
32
- end
33
- end
34
-
35
- Origami::PDF.class_eval { include PDFTools }
36
-
37
- module Mindee
38
- ALLOWED_MIME_TYPES = [
39
- 'application/pdf',
40
- 'image/heic',
41
- 'image/png',
42
- 'image/jpeg',
43
- 'image/tiff',
44
- 'image/webp',
45
- ].freeze
46
-
47
- MAX_DOC_PAGES = 3
48
-
49
- # Base class for loading documents.
50
- class InputDocument
51
- # @return [String]
52
- attr_reader :filename
53
- # @return [String]
54
- attr_reader :filepath
55
- # @return [String]
56
- attr_reader :file_mimetype
57
-
58
- # @param cut_pages [Boolean]
59
- # @param max_pages [Integer]
60
- def initialize(cut_pages, max_pages)
61
- @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
62
-
63
- unless ALLOWED_MIME_TYPES.include? @file_mimetype
64
- raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
65
- end
66
-
67
- merge_pdf_pages(max_pages) if cut_pages && pdf?
68
- end
69
-
70
- def pdf?
71
- @file_mimetype == 'application/pdf'
72
- end
73
-
74
- # @return [Integer]
75
- def page_count
76
- if pdf?
77
- current_pdf = open_pdf
78
- return current_pdf.pages.size
79
- end
80
- 1
81
- end
82
-
83
- # @param close [Boolean]
84
- def read_document(close: true)
85
- @io_stream.seek(0)
86
- data = @io_stream.read
87
- @io_stream.close if close
88
- [data].pack('m')
89
- end
90
-
91
- private
92
-
93
- # @param max_pages [Integer]
94
- def merge_pdf_pages(max_pages)
95
- current_pdf = open_pdf
96
- return if current_pdf.pages.size <= MAX_DOC_PAGES
97
-
98
- new_pdf = Origami::PDF.new
99
-
100
- to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
101
- to_insert.each do |page|
102
- new_pdf.append_page(page)
103
- end
104
- @io_stream = new_pdf.to_io_stream
105
- end
106
-
107
- # @return [Origami::PDF]
108
- def open_pdf
109
- pdf_parser = Origami::PDF::LinearParser.new({})
110
- @io_stream.seek(0)
111
- pdf_parser.parse(@io_stream)
112
- end
113
- end
114
-
115
- # Load a document from a path.
116
- class PathDocument < InputDocument
117
- def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
118
- @io_stream = File.open(filepath, 'rb')
119
- @filepath = filepath
120
- @filename = File.basename(filepath)
121
- super(cut_pages, max_pages)
122
- end
123
- end
124
-
125
- # Load a document from a base64 string.
126
- class Base64Document < InputDocument
127
- def initialize(base64_string, filename, cut_pages, max_pages: 3)
128
- @io_stream = StringIO.new(base64_string.unpack1('m*'))
129
- @io_stream.set_encoding Encoding::BINARY
130
- @filename = filename
131
- super(cut_pages, max_pages)
132
- end
133
- end
134
-
135
- # Load a document from raw bytes.
136
- class BytesDocument < InputDocument
137
- def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
138
- @io_stream = StringIO.new(raw_bytes)
139
- @io_stream.set_encoding Encoding::BINARY
140
- @filename = filename
141
- super(cut_pages, max_pages)
142
- end
143
- end
144
-
145
- # Load a document from a file handle.
146
- class FileDocument < InputDocument
147
- def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
148
- @io_stream = file_handle
149
- @filename = filename
150
- super(cut_pages, max_pages)
151
- end
152
- end
153
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Stores all response attributes.
5
- class DocumentResponse
6
- # @return [String]
7
- attr_reader :document_type
8
- # @return [Hash]
9
- attr_reader :http_response
10
- # @return [Mindee::Document]
11
- attr_reader :document
12
- # @return [Array<Mindee::Document>]
13
- attr_reader :pages
14
-
15
- # @param http_response [Hash]
16
- def initialize(http_response, document_type, document, pages)
17
- @http_response = http_response
18
- @document_type = document_type
19
- @document = document
20
- @pages = pages
21
- end
22
-
23
- def to_s
24
- inspect
25
- end
26
- end
27
- end