mindee 5.0.0.beta1 → 5.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0f0f74c2ffa0b955e76431c51bff64e2683439045ee6059d1a582885c5dcd2f
4
- data.tar.gz: 3663bcce828f5851c59a83bc71776e3c9d77ddc92aa657c8dc2573277a89186b
3
+ metadata.gz: c130d39c720927dbf32f427f50e3b6e64f43573a419adcae4b2c24b88825ceda
4
+ data.tar.gz: 11bea996953560479ccfec44fac65ed77ac83b45be0ceb64458d1e019fc3604d
5
5
  SHA512:
6
- metadata.gz: f2741d09c26053c130af682a29e28ae54854ba6d9ad652559d8b6706ff6d8fd2ea7afcc90ed2ed2cecc27a3926bc3fc61a2e7a671a3f0eaea63886ab9db929e0
7
- data.tar.gz: c3c72bbadb037a126c594c51610a43cb874a1d8cc15d43d3b5241bfaa69f3f1b647751faebf215f8642634ef77c72ea6cbd47af1e450a82c53075d7323cbf513
6
+ metadata.gz: cc9f76a7449ce3a9f9d99ff2cc03c545d5aadff3a088e817b34e2fb280ba1807b9bb8d1edfba3612e5929a7f51e9e4e007fe4f20bce4d4ba69c246aca3137ef3
7
+ data.tar.gz: 134e8286afd4e28a54a3523a9e691bb2ede3a631684d1640023badcdf758801b1b1a6eac4af6eddefe64d28f8659163feb05806024a5c6d33575c5bd002b8f2e
data/CHANGELOG.md CHANGED
@@ -1,5 +1,55 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v5.0.0 - 2026-04-20
4
+ ### ¡Breaking Changes!
5
+ * :boom: :recycle: update V1 & V2 syntaxes to match other SDKs
6
+ * :recycle: move V1 client to V1 module
7
+ * :recycle: move V2 client to V2 module
8
+ * :recycle: move legacy products to 'V1' module
9
+ * :recycle: add parsing and extraction to v1 module
10
+ * :recycle: move V1 HTTP to V1 module
11
+ * :recycle: move V2 HTTP module to V2 namespace
12
+ * :recycle: move data schema to extraction parameters namespace
13
+ * :arrow_up: :boom: drop support for ruby < 3.2
14
+ * :recycle: :boom: change raw_http attribute in responses to be actual json strings
15
+ * :recycle: :boom: make logging configurable and default output to stderr
16
+ * :recycle: :boom: remove useless `PDFExtractor` module
17
+ * :recycle: :boom: change `Errors` module to `Error`
18
+ * :recycle: :boom: change Ocr modules and classes to OCR to keep consistency
19
+ * :recycle: :boom: change `FileOperation` module name to `FileOperations`
20
+ * :recycle: :boom: change `Dependency` module name to `Dependencies`
21
+ * :boom: remove support for the following V1 products:
22
+ * :coffin: US Bank Check V1
23
+ * :coffin: Bill of Lading V1
24
+ * :coffin: Business Card V1
25
+ * :coffin: FR Carte Grise V1
26
+ * :coffin: Delivery Notes V1
27
+ * :coffin: Driver License V1
28
+ * :coffin: FR Energy Bill V1
29
+ * :coffin: Nutrition Facts V1
30
+ ### Changes
31
+ * :sparkles: :arrow_up: add support for mindee-lite gem
32
+ * :sparkles: add support for crop operation
33
+ * :sparkles: add support for split operation
34
+ * :sparkles: add support for model search
35
+ * :sparkles: add support for V2 CLI
36
+ * :wrench: :arrow_up: add better tooling and pre-commit hook
37
+ * :arrow_up: and bump all dependencies
38
+ ### Fixes
39
+ * :wrench: fix many typing issues
40
+ * :bug: fix webhook IDs not sending properly
41
+ * :bug: fix miscellaneous issues leading to saved `ExtractedPDF` instances having invalid names
42
+ * :recycle: fix miscellaneous typing issues relating to `ExtractedPDF` and `ExtractedImage` classes
43
+
44
+
45
+ ## v5.0.0.rc1 - 2026-04-15
46
+ ### ¡Breaking Changes!
47
+ * :recycle: :boom: change `FileOperation` module name to `FileOperations`
48
+ * :recycle: :boom: change `Dependency` module name to `Dependencies`
49
+ ### Fixes
50
+ * :wrench: fix many typing issues
51
+
52
+
3
53
  ## v5.0.0.beta1 - 2026-04-07
4
54
  ### ¡Breaking Changes!
5
55
  * :boom: :recycle: update V1 & V2 syntaxes to match other SDKs
@@ -25,7 +75,6 @@
25
75
  * :coffin: Driver License V1
26
76
  * :coffin: FR Energy Bill V1
27
77
  * :coffin: Nutrition Facts V1
28
-
29
78
  ### Changes
30
79
  * :sparkles: :arrow_up: add support for mindee-lite gem
31
80
  * :sparkles: add support for crop operation
@@ -34,7 +83,6 @@
34
83
  * :sparkles: add support for V2 CLI
35
84
  * :wrench: :arrow_up: add better tooling and pre-commit hook
36
85
  * :arrow_up: and bump all dependencies
37
-
38
86
  ### Fixes
39
87
  * :bug: fix webhook IDs not sending properly
40
88
  * :bug: fix miscellaneous issues leading to saved `ExtractedPDF` instances having invalid names
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Mindee
4
4
  # Centralized check for optional heavy dependencies
5
- module Dependency
5
+ module Dependencies
6
6
  def self.check_all_dependencies
7
7
  require 'origami'
8
8
  require 'mini_magick'
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'mini_magick'
5
5
  require 'origami'
6
6
  require 'stringio'
@@ -37,35 +37,33 @@ module Mindee
37
37
  new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
38
38
  new_stream.seek(0)
39
39
 
40
- extract_images_from_polygons(input_source, new_stream, page_id, polygons)
40
+ extract_images_from_polygons(input_source, page_id, polygons)
41
41
  end
42
42
 
43
43
  # Extracts images from their positions on a file (as polygons).
44
44
  #
45
45
  # @param [Input::Source::LocalInputSource] input_source Local input source.
46
- # @param [StringIO] pdf_stream Buffer of the PDF.
47
46
  # @param [Integer] page_id Page ID.
48
47
  # @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
49
48
  # @return [Array<Image::ExtractedImage>] Extracted Images.
50
- def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
49
+ def self.extract_images_from_polygons(input_source, page_id, polygons)
51
50
  extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
52
51
 
52
+ input_source.io_stream.rewind
53
+ pdf_stream = StringIO.new(input_source.io_stream.read.to_s)
54
+ input_source.io_stream.rewind
53
55
  polygons.each_with_index do |polygon, element_id|
54
56
  polygon = ImageUtils.normalize_polygon(polygon)
55
57
  page_content = ImageUtils.read_page_content(pdf_stream)
58
+ points = [
59
+ polygon.top_left,
60
+ polygon.bottom_right,
61
+ polygon.top_right,
62
+ polygon.bottom_left,
63
+ ]
56
64
 
57
- min_max_x = Geometry.get_min_max_x([
58
- polygon.top_left,
59
- polygon.bottom_right,
60
- polygon.top_right,
61
- polygon.bottom_left,
62
- ])
63
- min_max_y = Geometry.get_min_max_y([
64
- polygon.top_left,
65
- polygon.bottom_right,
66
- polygon.top_right,
67
- polygon.bottom_left,
68
- ])
65
+ min_max_x = Geometry.get_min_max_x(points)
66
+ min_max_y = Geometry.get_min_max_y(points)
69
67
  file_extension = ImageUtils.determine_file_extension(input_source)
70
68
  cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
71
69
  if file_extension == 'pdf'
@@ -4,9 +4,9 @@ require 'stringio'
4
4
  require 'marcel'
5
5
  require 'fileutils'
6
6
 
7
- require_relative '../../dependency'
8
- require_relative '../../pdf' if Mindee::Dependency.all_deps_available?
9
- require_relative '../../image' if Mindee::Dependency.all_deps_available?
7
+ require_relative '../../dependencies'
8
+ require_relative '../../pdf' if Mindee::Dependencies.all_deps_available?
9
+ require_relative '../../image' if Mindee::Dependencies.all_deps_available?
10
10
 
11
11
  module Mindee
12
12
  module Input
@@ -143,8 +143,8 @@ module Mindee
143
143
  # Defaults to one for images.
144
144
  # @return [Integer]
145
145
  def page_count
146
- unless Mindee::Dependency.all_deps_available?
147
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
146
+ unless Mindee::Dependencies.all_deps_available?
147
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
148
148
  end
149
149
  return 1 unless pdf?
150
150
 
@@ -163,8 +163,8 @@ module Mindee
163
163
  # @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
164
164
  # not. Needs force_source_text to work.
165
165
  def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
166
- unless Mindee::Dependency.all_deps_available?
167
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
166
+ unless Mindee::Dependencies.all_deps_available?
167
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
168
168
  end
169
169
 
170
170
  buffer = if pdf?
@@ -189,8 +189,8 @@ module Mindee
189
189
  # Checks whether the file has source text if it is a pdf. `false` otherwise
190
190
  # @return [bool] `true` if the file is a PDF and has source text.
191
191
  def source_text?
192
- unless Mindee::Dependency.all_deps_available?
193
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
192
+ unless Mindee::Dependencies.all_deps_available?
193
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
194
194
  end
195
195
 
196
196
  Mindee::PDF::PDFTools.source_text?(@io_stream)
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'pdf-reader'
5
5
 
6
6
  # Shorthand for pdf-reader's PDF namespace, to avoid mixups with the local Origami fork.
@@ -7,8 +7,8 @@ module Mindee
7
7
  class PDFExtractor
8
8
  # @param local_input [Mindee::Input::Source::LocalInputSource]
9
9
  def initialize(local_input)
10
- unless Mindee::Dependency.all_deps_available?
11
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
10
+ unless Mindee::Dependencies.all_deps_available?
11
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
12
12
  end
13
13
 
14
14
  @filename = local_input.filename
@@ -82,14 +82,14 @@ module Mindee
82
82
  page_indexes_as_array = page_indexes # @type var page_indexes : Array[Array[Integer]]
83
83
  return extract_sub_documents(page_indexes_as_array)
84
84
  end
85
- p_ids = page_indexes # @type var page_indexes: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
85
+ p_ids = page_indexes # @type var page_indexes: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
86
86
  return extract_sub_documents(p_ids.map(&:page_indexes)) unless strict
87
87
 
88
88
  correct_page_indexes = [] # @type var correct_page_indexes: Array[Array[Integer]]
89
89
  current_list = [] # @type var current_list: Array[Integer]
90
90
  previous_confidence = nil
91
91
  p_ids.each_with_index do |p_i, i|
92
- page_index = p_i # @type var page_index: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
92
+ page_index = p_i # @type var page_index: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
93
93
  confidence = page_index.confidence.to_f
94
94
  page_list = page_index.page_indexes
95
95
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'origami'
5
5
  require_relative 'pdf_tools'
6
6
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'origami'
5
5
 
6
6
  module Mindee
@@ -19,7 +19,7 @@ module Mindee
19
19
  # @param page_id [Integer, nil]
20
20
  def initialize(prediction, page_id)
21
21
  super
22
- @invoice_page_groups = Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups.new(
22
+ @invoice_page_groups = InvoiceSplitterV1InvoicePageGroups.new(
23
23
  prediction['invoice_page_groups'], page_id
24
24
  )
25
25
  end
data/lib/mindee/v1.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'v1/client'
4
- require_relative 'v1/extraction' if Mindee::Dependency.all_deps_available?
4
+ require_relative 'v1/extraction' if Mindee::Dependencies.all_deps_available?
5
5
  require_relative 'v1/http'
6
6
  require_relative 'v1/parsing'
7
7
  require_relative 'v1/product'
@@ -17,11 +17,11 @@ module Mindee
17
17
  ).first
18
18
  end
19
19
 
20
- # Extracts individual receipts from multi-receipts documents.
20
+ # Extracts multiple crop zones from an image.
21
21
  #
22
22
  # @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
23
23
  # @param crops [Array<CropItem>] List of crops.
24
- # @return [CropFiles] Individual extracted receipts as an array of ExtractedImage.
24
+ # @return [CropFiles] Individual extracted zones as an array of ExtractedImage.
25
25
  # @raise [MindeeError] if the crops array is empty.
26
26
  def self.extract_crops(input_source, crops)
27
27
  if crops.nil? || crops.empty?
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'file_operations/crop'
4
+ require_relative 'file_operations/crop_files'
5
+ require_relative 'file_operations/split'
6
+ require_relative 'file_operations/split_files'
@@ -26,7 +26,7 @@ module Mindee
26
26
  @inference.to_s
27
27
  end
28
28
 
29
- # Extracts the crops from the input source.
29
+ # Splits the input PDF.
30
30
  # @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
31
31
  # @return [FileOperation::SplitFiles]
32
32
  def extract_from_file(input_source)
data/lib/mindee/v2.rb CHANGED
@@ -2,6 +2,6 @@
2
2
 
3
3
  require_relative 'v2/client'
4
4
  require_relative 'v2/http'
5
- require_relative 'v2/file_operation' if Mindee::Dependency.all_deps_available?
5
+ require_relative 'v2/file_operations' if Mindee::Dependencies.all_deps_available?
6
6
  require_relative 'v2/parsing'
7
7
  require_relative 'v2/product'
@@ -3,7 +3,7 @@
3
3
  # Mindee
4
4
  module Mindee
5
5
  # Current version.
6
- VERSION = '5.0.0.beta1'
6
+ VERSION = '5.0.0'
7
7
 
8
8
  # Finds and return the current platform.
9
9
  # @return [Symbol, Hash[String | Symbol, Regexp], Nil?]
data/lib/mindee.rb CHANGED
@@ -7,7 +7,7 @@ require 'mindee/v2'
7
7
 
8
8
  module Mindee
9
9
  # Dependency management
10
- module Dependency
10
+ module Dependencies
11
11
  end
12
12
 
13
13
  # Mindee internal error module.
@@ -1,5 +1,5 @@
1
1
  module Mindee
2
- module Dependency
2
+ module Dependencies
3
3
  MINDEE_DEPENDENCIES_LOAD_ERROR: String
4
4
 
5
5
  self.@all_deps_available: bool
@@ -5,7 +5,7 @@ module Mindee
5
5
  def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF
6
6
  def self.to_blob: () -> String
7
7
  def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8
- def self.extract_images_from_polygons: (Input::Source::LocalInputSource, StringIO | File, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8
+ def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
9
9
  def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage
10
10
  def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File)
11
11
  end
@@ -5,7 +5,7 @@ module Mindee
5
5
  class BaseParameters
6
6
  attr_reader self.slug: String
7
7
 
8
- def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance
8
+ def self.from_hash: (params: Hash[String | Symbol, untyped]) -> BaseParameters
9
9
  def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped]
10
10
 
11
11
  def slug: -> String
@@ -13,7 +13,7 @@ module Mindee
13
13
 
14
14
  def extract_sub_documents: (Array[Array[Integer]]) -> Array[ExtractedPDF]
15
15
 
16
- def extract_invoices: (Array[V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
16
+ def extract_invoices: (Array[Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
17
17
  end
18
18
  end
19
19
  end
@@ -6,7 +6,7 @@ module Mindee
6
6
  module InvoiceSplitter
7
7
  class InvoiceSplitterV1Document < Parsing::Common::Prediction
8
8
  def initialize: (Hash[String | Symbol, untyped], Integer?) -> void
9
- def invoice_page_groups: -> (Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups)
9
+ def invoice_page_groups: -> InvoiceSplitterV1InvoicePageGroups
10
10
  def invoice_page_groups_separator: (String) -> String
11
11
  def invoice_page_groups_to_s: -> String
12
12
  def to_s: -> String
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mindee
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0.beta1
4
+ version: 5.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mindee, SA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-08 00:00:00.000000000 Z
11
+ date: 2026-04-20 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -231,7 +231,7 @@ files:
231
231
  - examples/auto_invoice_splitter_extraction.rb
232
232
  - examples/auto_multi_receipts_detector_extraction.rb
233
233
  - lib/mindee.rb
234
- - lib/mindee/dependency.rb
234
+ - lib/mindee/dependencies.rb
235
235
  - lib/mindee/error.rb
236
236
  - lib/mindee/error/mindee_error.rb
237
237
  - lib/mindee/error/mindee_http_error.rb
@@ -396,11 +396,11 @@ files:
396
396
  - lib/mindee/v1/product/universal/universal_prediction.rb
397
397
  - lib/mindee/v2.rb
398
398
  - lib/mindee/v2/client.rb
399
- - lib/mindee/v2/file_operation.rb
400
- - lib/mindee/v2/file_operation/crop.rb
401
- - lib/mindee/v2/file_operation/crop_files.rb
402
- - lib/mindee/v2/file_operation/split.rb
403
- - lib/mindee/v2/file_operation/split_files.rb
399
+ - lib/mindee/v2/file_operations.rb
400
+ - lib/mindee/v2/file_operations/crop.rb
401
+ - lib/mindee/v2/file_operations/crop_files.rb
402
+ - lib/mindee/v2/file_operations/split.rb
403
+ - lib/mindee/v2/file_operations/split_files.rb
404
404
  - lib/mindee/v2/http.rb
405
405
  - lib/mindee/v2/http/.rubocop.yml
406
406
  - lib/mindee/v2/http/api_v2_settings.rb
@@ -477,7 +477,7 @@ files:
477
477
  - sig/custom/net_http.rbs
478
478
  - sig/custom/origami.rbs
479
479
  - sig/mindee.rbs
480
- - sig/mindee/dependency.rbs
480
+ - sig/mindee/dependencies.rbs
481
481
  - sig/mindee/error/mindee_error.rbs
482
482
  - sig/mindee/error/mindee_http_error.rbs
483
483
  - sig/mindee/error/mindee_http_error_v2.rbs
@@ -706,9 +706,9 @@ required_ruby_version: !ruby/object:Gem::Requirement
706
706
  version: '3.2'
707
707
  required_rubygems_version: !ruby/object:Gem::Requirement
708
708
  requirements:
709
- - - ">"
709
+ - - ">="
710
710
  - !ruby/object:Gem::Version
711
- version: 1.3.1
711
+ version: '0'
712
712
  requirements: []
713
713
  rubygems_version: 3.4.19
714
714
  signing_key:
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'file_operation/crop'
4
- require_relative 'file_operation/crop_files'
5
- require_relative 'file_operation/split'
6
- require_relative 'file_operation/split_files'