mindee 1.2.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -1
  3. data/.rubocop.yml +2 -2
  4. data/.yardopts +4 -0
  5. data/CHANGELOG.md +21 -0
  6. data/README.md +46 -23
  7. data/Rakefile +6 -1
  8. data/bin/mindee.rb +70 -61
  9. data/docs/ruby-api-builder.md +131 -0
  10. data/docs/ruby-getting-started.md +265 -0
  11. data/docs/ruby-invoice-ocr.md +261 -0
  12. data/docs/ruby-passport-ocr.md +156 -0
  13. data/docs/ruby-receipt-ocr.md +170 -0
  14. data/lib/mindee/client.rb +128 -93
  15. data/lib/mindee/document_config.rb +22 -154
  16. data/lib/mindee/geometry.rb +105 -8
  17. data/lib/mindee/http/endpoint.rb +80 -0
  18. data/lib/mindee/input/pdf_processing.rb +106 -0
  19. data/lib/mindee/input/sources.rb +97 -0
  20. data/lib/mindee/input.rb +3 -0
  21. data/lib/mindee/parsing/document.rb +31 -0
  22. data/lib/mindee/parsing/error.rb +22 -0
  23. data/lib/mindee/parsing/inference.rb +53 -0
  24. data/lib/mindee/parsing/page.rb +46 -0
  25. data/lib/mindee/parsing/prediction/base.rb +30 -0
  26. data/lib/mindee/{fields → parsing/prediction/common_fields}/amount.rb +5 -1
  27. data/lib/mindee/{fields → parsing/prediction/common_fields}/base.rb +16 -5
  28. data/lib/mindee/{fields → parsing/prediction/common_fields}/company_registration.rb +0 -0
  29. data/lib/mindee/{fields/datefield.rb → parsing/prediction/common_fields/date.rb} +0 -0
  30. data/lib/mindee/{fields → parsing/prediction/common_fields}/locale.rb +0 -0
  31. data/lib/mindee/{fields → parsing/prediction/common_fields}/payment_details.rb +0 -0
  32. data/lib/mindee/parsing/prediction/common_fields/position.rb +39 -0
  33. data/lib/mindee/{fields → parsing/prediction/common_fields}/tax.rb +7 -2
  34. data/lib/mindee/parsing/prediction/common_fields/text.rb +12 -0
  35. data/lib/mindee/parsing/prediction/common_fields.rb +11 -0
  36. data/lib/mindee/parsing/prediction/custom/custom_v1.rb +58 -0
  37. data/lib/mindee/{fields/custom_docs.rb → parsing/prediction/custom/fields.rb} +5 -5
  38. data/lib/mindee/parsing/prediction/eu/license_plate/license_plate_v1.rb +34 -0
  39. data/lib/mindee/parsing/prediction/fr/bank_account_details/bank_account_details_v1.rb +40 -0
  40. data/lib/mindee/parsing/prediction/fr/carte_vitale/carte_vitale_v1.rb +49 -0
  41. data/lib/mindee/parsing/prediction/fr/id_card/id_card_v1.rb +84 -0
  42. data/lib/mindee/parsing/prediction/invoice/invoice_line_item.rb +58 -0
  43. data/lib/mindee/parsing/prediction/invoice/invoice_v4.rb +216 -0
  44. data/lib/mindee/parsing/prediction/passport/passport_v1.rb +184 -0
  45. data/lib/mindee/parsing/prediction/receipt/receipt_v4.rb +84 -0
  46. data/lib/mindee/parsing/prediction/shipping_container/shipping_container_v1.rb +38 -0
  47. data/lib/mindee/parsing/prediction/us/bank_check/bank_check_v1.rb +70 -0
  48. data/lib/mindee/parsing/prediction.rb +12 -0
  49. data/lib/mindee/parsing.rb +4 -0
  50. data/lib/mindee/version.rb +1 -1
  51. data/mindee.gemspec +2 -1
  52. metadata +57 -24
  53. data/lib/mindee/documents/base.rb +0 -35
  54. data/lib/mindee/documents/custom.rb +0 -65
  55. data/lib/mindee/documents/financial_doc.rb +0 -135
  56. data/lib/mindee/documents/invoice.rb +0 -162
  57. data/lib/mindee/documents/passport.rb +0 -163
  58. data/lib/mindee/documents/receipt.rb +0 -109
  59. data/lib/mindee/documents.rb +0 -7
  60. data/lib/mindee/endpoint.rb +0 -105
  61. data/lib/mindee/fields/orientation.rb +0 -26
  62. data/lib/mindee/fields.rb +0 -11
  63. data/lib/mindee/inputs.rb +0 -153
  64. data/lib/mindee/response.rb +0 -27
@@ -1,26 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Represents page orientation.
5
- class Orientation
6
- # @return [Integer]
7
- attr_reader :page_id
8
- # The confidence score, value will be between 0.0 and 1.0
9
- # @return [Float]
10
- attr_reader :confidence
11
- # A prediction among these 3 possible outputs:
12
- # * 0 degrees: the page is already upright
13
- # * 90 degrees: the page must be rotated clockwise to be upright
14
- # * 270 degrees: the page must be rotated counterclockwise to be upright
15
- # @return [Integer]
16
- attr_reader :degrees
17
-
18
- # @param prediction [Hash]
19
- # @param page_id [Integer]
20
- def initialize(prediction, page_id)
21
- @degrees = prediction['degrees']
22
- @confidence = prediction['confidence']
23
- @page_id = page_id
24
- end
25
- end
26
- end
data/lib/mindee/fields.rb DELETED
@@ -1,11 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'fields/amount'
4
- require_relative 'fields/base'
5
- require_relative 'fields/company_registration'
6
- require_relative 'fields/datefield'
7
- require_relative 'fields/locale'
8
- require_relative 'fields/orientation'
9
- require_relative 'fields/payment_details'
10
- require_relative 'fields/tax'
11
- require_relative 'fields/custom_docs'
data/lib/mindee/inputs.rb DELETED
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'stringio'
4
- require 'origami'
5
- require 'marcel'
6
-
7
- # Monkey-patching for Origami
8
- module PDFTools
9
- def to_io_stream(params = {})
10
- options = {
11
- delinearize: true,
12
- recompile: true,
13
- decrypt: false,
14
- }
15
- options.update(params)
16
-
17
- if frozen? # incompatible flags with frozen doc (signed)
18
- options[:recompile] = nil
19
- options[:rebuild_xrefs] = nil
20
- options[:noindent] = nil
21
- options[:obfuscate] = false
22
- end
23
- load_all_objects unless @loaded
24
-
25
- intents_as_pdfa1 if options[:intent] =~ %r{pdf[/-]?A1?/i}
26
- delinearize! if options[:delinearize] && linearized?
27
- compile(options) if options[:recompile]
28
-
29
- io_stream = StringIO.new(output(options))
30
- io_stream.set_encoding Encoding::BINARY
31
- io_stream
32
- end
33
- end
34
-
35
- Origami::PDF.class_eval { include PDFTools }
36
-
37
- module Mindee
38
- ALLOWED_MIME_TYPES = [
39
- 'application/pdf',
40
- 'image/heic',
41
- 'image/png',
42
- 'image/jpeg',
43
- 'image/tiff',
44
- 'image/webp',
45
- ].freeze
46
-
47
- MAX_DOC_PAGES = 3
48
-
49
- # Base class for loading documents.
50
- class InputDocument
51
- # @return [String]
52
- attr_reader :filename
53
- # @return [String]
54
- attr_reader :filepath
55
- # @return [String]
56
- attr_reader :file_mimetype
57
-
58
- # @param cut_pages [Boolean]
59
- # @param max_pages [Integer]
60
- def initialize(cut_pages, max_pages)
61
- @file_mimetype = Marcel::MimeType.for @io_stream, name: @filename
62
-
63
- unless ALLOWED_MIME_TYPES.include? @file_mimetype
64
- raise "File type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}"
65
- end
66
-
67
- merge_pdf_pages(max_pages) if cut_pages && pdf?
68
- end
69
-
70
- def pdf?
71
- @file_mimetype == 'application/pdf'
72
- end
73
-
74
- # @return [Integer]
75
- def page_count
76
- if pdf?
77
- current_pdf = open_pdf
78
- return current_pdf.pages.size
79
- end
80
- 1
81
- end
82
-
83
- # @param close [Boolean]
84
- def read_document(close: true)
85
- @io_stream.seek(0)
86
- data = @io_stream.read
87
- @io_stream.close if close
88
- [data].pack('m')
89
- end
90
-
91
- private
92
-
93
- # @param max_pages [Integer]
94
- def merge_pdf_pages(max_pages)
95
- current_pdf = open_pdf
96
- return if current_pdf.pages.size <= MAX_DOC_PAGES
97
-
98
- new_pdf = Origami::PDF.new
99
-
100
- to_insert = [current_pdf.pages[0], current_pdf.pages[-2], current_pdf.pages[-1]].take(max_pages)
101
- to_insert.each do |page|
102
- new_pdf.append_page(page)
103
- end
104
- @io_stream = new_pdf.to_io_stream
105
- end
106
-
107
- # @return [Origami::PDF]
108
- def open_pdf
109
- pdf_parser = Origami::PDF::LinearParser.new({})
110
- @io_stream.seek(0)
111
- pdf_parser.parse(@io_stream)
112
- end
113
- end
114
-
115
- # Load a document from a path.
116
- class PathDocument < InputDocument
117
- def initialize(filepath, cut_pages, max_pages: MAX_DOC_PAGES)
118
- @io_stream = File.open(filepath, 'rb')
119
- @filepath = filepath
120
- @filename = File.basename(filepath)
121
- super(cut_pages, max_pages)
122
- end
123
- end
124
-
125
- # Load a document from a base64 string.
126
- class Base64Document < InputDocument
127
- def initialize(base64_string, filename, cut_pages, max_pages: 3)
128
- @io_stream = StringIO.new(base64_string.unpack1('m*'))
129
- @io_stream.set_encoding Encoding::BINARY
130
- @filename = filename
131
- super(cut_pages, max_pages)
132
- end
133
- end
134
-
135
- # Load a document from raw bytes.
136
- class BytesDocument < InputDocument
137
- def initialize(raw_bytes, filename, cut_pages, max_pages: MAX_DOC_PAGES)
138
- @io_stream = StringIO.new(raw_bytes)
139
- @io_stream.set_encoding Encoding::BINARY
140
- @filename = filename
141
- super(cut_pages, max_pages)
142
- end
143
- end
144
-
145
- # Load a document from a file handle.
146
- class FileDocument < InputDocument
147
- def initialize(file_handle, filename, cut_pages, max_pages: MAX_DOC_PAGES)
148
- @io_stream = file_handle
149
- @filename = filename
150
- super(cut_pages, max_pages)
151
- end
152
- end
153
- end
@@ -1,27 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Mindee
4
- # Stores all response attributes.
5
- class DocumentResponse
6
- # @return [String]
7
- attr_reader :document_type
8
- # @return [Hash]
9
- attr_reader :http_response
10
- # @return [Mindee::Document]
11
- attr_reader :document
12
- # @return [Array<Mindee::Document>]
13
- attr_reader :pages
14
-
15
- # @param http_response [Hash]
16
- def initialize(http_response, document_type, document, pages)
17
- @http_response = http_response
18
- @document_type = document_type
19
- @document = document
20
- @pages = pages
21
- end
22
-
23
- def to_s
24
- inspect
25
- end
26
- end
27
- end