mindee 5.0.0.beta1 → 5.0.0.rc1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: a0f0f74c2ffa0b955e76431c51bff64e2683439045ee6059d1a582885c5dcd2f
4
- data.tar.gz: 3663bcce828f5851c59a83bc71776e3c9d77ddc92aa657c8dc2573277a89186b
3
+ metadata.gz: 1b7650df6a4e2a2e94e5d147d40ce8c181959e8d2b654239a19f8a8db403aafd
4
+ data.tar.gz: dafe1f6fbd11fea9a0454a0e4cfb5f1a8ba3a26bac06e2d4ac8e977eb7d64bc8
5
5
  SHA512:
6
- metadata.gz: f2741d09c26053c130af682a29e28ae54854ba6d9ad652559d8b6706ff6d8fd2ea7afcc90ed2ed2cecc27a3926bc3fc61a2e7a671a3f0eaea63886ab9db929e0
7
- data.tar.gz: c3c72bbadb037a126c594c51610a43cb874a1d8cc15d43d3b5241bfaa69f3f1b647751faebf215f8642634ef77c72ea6cbd47af1e450a82c53075d7323cbf513
6
+ metadata.gz: f07d1f149857e63753efa589c6c7c1551ad2b7b6814919864957434e9cbe601fbc7b26bfc7e9a75d7ebd97748dbb3b75fbec09ba686c56b1144af4ead1d90b84
7
+ data.tar.gz: 72a41a55fdf34a461115bc4d6d898319002842abb4061b6be7eeceb3322a59cbf55c0d2acb304c70485eaae956c2a9209bcea51929a5ea70b3e225b458c93244
data/CHANGELOG.md CHANGED
@@ -1,5 +1,13 @@
1
1
  # Mindee Ruby API Library Changelog
2
2
 
3
+ ## v5.0.0.rc1 - 2026-04-15
4
+ ### ¡Breaking Changes!
5
+ * :recycle: :boom: change `FileOperation` module name to `FileOperations`
6
+ * :recycle: :boom: change `Dependency` module name to `Dependencies`
7
+ ### Fixes
8
+ * :wrench: fix many typing issues
9
+
10
+
3
11
  ## v5.0.0.beta1 - 2026-04-07
4
12
  ### ¡Breaking Changes!
5
13
  * :boom: :recycle: update V1 & V2 syntaxes to match other SDKs
@@ -25,7 +33,6 @@
25
33
  * :coffin: Driver License V1
26
34
  * :coffin: FR Energy Bill V1
27
35
  * :coffin: Nutrition Facts V1
28
-
29
36
  ### Changes
30
37
  * :sparkles: :arrow_up: add support for mindee-lite gem
31
38
  * :sparkles: add support for crop operation
@@ -34,7 +41,6 @@
34
41
  * :sparkles: add support for V2 CLI
35
42
  * :wrench: :arrow_up: add better tooling and pre-commit hook
36
43
  * :arrow_up: and bump all dependencies
37
-
38
44
  ### Fixes
39
45
  * :bug: fix webhook IDs not sending properly
40
46
  * :bug: fix miscellaneous issues leading to saved `ExtractedPDF` instances having invalid names
@@ -2,7 +2,7 @@
2
2
 
3
3
  module Mindee
4
4
  # Centralized check for optional heavy dependencies
5
- module Dependency
5
+ module Dependencies
6
6
  def self.check_all_dependencies
7
7
  require 'origami'
8
8
  require 'mini_magick'
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'mini_magick'
5
5
  require 'origami'
6
6
  require 'stringio'
@@ -37,35 +37,33 @@ module Mindee
37
37
  new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
38
38
  new_stream.seek(0)
39
39
 
40
- extract_images_from_polygons(input_source, new_stream, page_id, polygons)
40
+ extract_images_from_polygons(input_source, page_id, polygons)
41
41
  end
42
42
 
43
43
  # Extracts images from their positions on a file (as polygons).
44
44
  #
45
45
  # @param [Input::Source::LocalInputSource] input_source Local input source.
46
- # @param [StringIO] pdf_stream Buffer of the PDF.
47
46
  # @param [Integer] page_id Page ID.
48
47
  # @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
49
48
  # @return [Array<Image::ExtractedImage>] Extracted Images.
50
- def self.extract_images_from_polygons(input_source, pdf_stream, page_id, polygons)
49
+ def self.extract_images_from_polygons(input_source, page_id, polygons)
51
50
  extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
52
51
 
52
+ input_source.io_stream.rewind
53
+ pdf_stream = StringIO.new(input_source.io_stream.read.to_s)
54
+ input_source.io_stream.rewind
53
55
  polygons.each_with_index do |polygon, element_id|
54
56
  polygon = ImageUtils.normalize_polygon(polygon)
55
57
  page_content = ImageUtils.read_page_content(pdf_stream)
58
+ points = [
59
+ polygon.top_left,
60
+ polygon.bottom_right,
61
+ polygon.top_right,
62
+ polygon.bottom_left,
63
+ ]
56
64
 
57
- min_max_x = Geometry.get_min_max_x([
58
- polygon.top_left,
59
- polygon.bottom_right,
60
- polygon.top_right,
61
- polygon.bottom_left,
62
- ])
63
- min_max_y = Geometry.get_min_max_y([
64
- polygon.top_left,
65
- polygon.bottom_right,
66
- polygon.top_right,
67
- polygon.bottom_left,
68
- ])
65
+ min_max_x = Geometry.get_min_max_x(points)
66
+ min_max_y = Geometry.get_min_max_y(points)
69
67
  file_extension = ImageUtils.determine_file_extension(input_source)
70
68
  cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
71
69
  if file_extension == 'pdf'
@@ -4,9 +4,9 @@ require 'stringio'
4
4
  require 'marcel'
5
5
  require 'fileutils'
6
6
 
7
- require_relative '../../dependency'
8
- require_relative '../../pdf' if Mindee::Dependency.all_deps_available?
9
- require_relative '../../image' if Mindee::Dependency.all_deps_available?
7
+ require_relative '../../dependencies'
8
+ require_relative '../../pdf' if Mindee::Dependencies.all_deps_available?
9
+ require_relative '../../image' if Mindee::Dependencies.all_deps_available?
10
10
 
11
11
  module Mindee
12
12
  module Input
@@ -143,8 +143,8 @@ module Mindee
143
143
  # Defaults to one for images.
144
144
  # @return [Integer]
145
145
  def page_count
146
- unless Mindee::Dependency.all_deps_available?
147
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
146
+ unless Mindee::Dependencies.all_deps_available?
147
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
148
148
  end
149
149
  return 1 unless pdf?
150
150
 
@@ -163,8 +163,8 @@ module Mindee
163
163
  # @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
164
164
  # not. Needs force_source_text to work.
165
165
  def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
166
- unless Mindee::Dependency.all_deps_available?
167
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
166
+ unless Mindee::Dependencies.all_deps_available?
167
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
168
168
  end
169
169
 
170
170
  buffer = if pdf?
@@ -189,8 +189,8 @@ module Mindee
189
189
  # Checks whether the file has source text if it is a pdf. `false` otherwise
190
190
  # @return [bool] `true` if the file is a PDF and has source text.
191
191
  def source_text?
192
- unless Mindee::Dependency.all_deps_available?
193
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
192
+ unless Mindee::Dependencies.all_deps_available?
193
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
194
194
  end
195
195
 
196
196
  Mindee::PDF::PDFTools.source_text?(@io_stream)
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'pdf-reader'
5
5
 
6
6
  # Shorthand for pdf-reader's PDF namespace, to avoid mixups with the local Origami fork.
@@ -7,8 +7,8 @@ module Mindee
7
7
  class PDFExtractor
8
8
  # @param local_input [Mindee::Input::Source::LocalInputSource]
9
9
  def initialize(local_input)
10
- unless Mindee::Dependency.all_deps_available?
11
- raise NotImplementedError, Mindee::Dependency::MINDEE_DEPENDENCIES_LOAD_ERROR
10
+ unless Mindee::Dependencies.all_deps_available?
11
+ raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
12
12
  end
13
13
 
14
14
  @filename = local_input.filename
@@ -82,14 +82,14 @@ module Mindee
82
82
  page_indexes_as_array = page_indexes # @type var page_indexes : Array[Array[Integer]]
83
83
  return extract_sub_documents(page_indexes_as_array)
84
84
  end
85
- p_ids = page_indexes # @type var page_indexes: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
85
+ p_ids = page_indexes # @type var page_indexes: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
86
86
  return extract_sub_documents(p_ids.map(&:page_indexes)) unless strict
87
87
 
88
88
  correct_page_indexes = [] # @type var correct_page_indexes: Array[Array[Integer]]
89
89
  current_list = [] # @type var current_list: Array[Integer]
90
90
  previous_confidence = nil
91
91
  p_ids.each_with_index do |p_i, i|
92
- page_index = p_i # @type var page_index: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
92
+ page_index = p_i # @type var page_index: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
93
93
  confidence = page_index.confidence.to_f
94
94
  page_list = page_index.page_indexes
95
95
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'origami'
5
5
  require_relative 'pdf_tools'
6
6
 
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- Mindee::Dependency.require_all_deps!
3
+ Mindee::Dependencies.require_all_deps!
4
4
  require 'origami'
5
5
 
6
6
  module Mindee
@@ -19,7 +19,7 @@ module Mindee
19
19
  # @param page_id [Integer, nil]
20
20
  def initialize(prediction, page_id)
21
21
  super
22
- @invoice_page_groups = Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups.new(
22
+ @invoice_page_groups = InvoiceSplitterV1InvoicePageGroups.new(
23
23
  prediction['invoice_page_groups'], page_id
24
24
  )
25
25
  end
data/lib/mindee/v1.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require_relative 'v1/client'
4
- require_relative 'v1/extraction' if Mindee::Dependency.all_deps_available?
4
+ require_relative 'v1/extraction' if Mindee::Dependencies.all_deps_available?
5
5
  require_relative 'v1/http'
6
6
  require_relative 'v1/parsing'
7
7
  require_relative 'v1/product'
@@ -17,11 +17,11 @@ module Mindee
17
17
  ).first
18
18
  end
19
19
 
20
- # Extracts individual receipts from multi-receipts documents.
20
+ # Extracts multiple crop zones from an image.
21
21
  #
22
22
  # @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
23
23
  # @param crops [Array<CropItem>] List of crops.
24
- # @return [CropFiles] Individual extracted receipts as an array of ExtractedImage.
24
+ # @return [CropFiles] Individual extracted zones as an array of ExtractedImage.
25
25
  # @raise [MindeeError] if the crops array is empty.
26
26
  def self.extract_crops(input_source, crops)
27
27
  if crops.nil? || crops.empty?
@@ -0,0 +1,6 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'file_operations/crop'
4
+ require_relative 'file_operations/crop_files'
5
+ require_relative 'file_operations/split'
6
+ require_relative 'file_operations/split_files'
@@ -26,7 +26,7 @@ module Mindee
26
26
  @inference.to_s
27
27
  end
28
28
 
29
- # Extracts the crops from the input source.
29
+ # Splits the input PDF.
30
30
  # @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
31
31
  # @return [FileOperation::SplitFiles]
32
32
  def extract_from_file(input_source)
data/lib/mindee/v2.rb CHANGED
@@ -2,6 +2,6 @@
2
2
 
3
3
  require_relative 'v2/client'
4
4
  require_relative 'v2/http'
5
- require_relative 'v2/file_operation' if Mindee::Dependency.all_deps_available?
5
+ require_relative 'v2/file_operations' if Mindee::Dependencies.all_deps_available?
6
6
  require_relative 'v2/parsing'
7
7
  require_relative 'v2/product'
@@ -3,7 +3,7 @@
3
3
  # Mindee
4
4
  module Mindee
5
5
  # Current version.
6
- VERSION = '5.0.0.beta1'
6
+ VERSION = '5.0.0.rc1'
7
7
 
8
8
  # Finds and return the current platform.
9
9
  # @return [Symbol, Hash[String | Symbol, Regexp], Nil?]
data/lib/mindee.rb CHANGED
@@ -7,7 +7,7 @@ require 'mindee/v2'
7
7
 
8
8
  module Mindee
9
9
  # Dependency management
10
- module Dependency
10
+ module Dependencies
11
11
  end
12
12
 
13
13
  # Mindee internal error module.
@@ -1,5 +1,5 @@
1
1
  module Mindee
2
- module Dependency
2
+ module Dependencies
3
3
  MINDEE_DEPENDENCIES_LOAD_ERROR: String
4
4
 
5
5
  self.@all_deps_available: bool
@@ -5,7 +5,7 @@ module Mindee
5
5
  def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF
6
6
  def self.to_blob: () -> String
7
7
  def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8
- def self.extract_images_from_polygons: (Input::Source::LocalInputSource, StringIO | File, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
8
+ def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
9
9
  def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage
10
10
  def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File)
11
11
  end
@@ -5,7 +5,7 @@ module Mindee
5
5
  class BaseParameters
6
6
  attr_reader self.slug: String
7
7
 
8
- def self.from_hash: (params: Hash[String | Symbol, untyped]) -> instance
8
+ def self.from_hash: (params: Hash[String | Symbol, untyped]) -> BaseParameters
9
9
  def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped]
10
10
 
11
11
  def slug: -> String
@@ -13,7 +13,7 @@ module Mindee
13
13
 
14
14
  def extract_sub_documents: (Array[Array[Integer]]) -> Array[ExtractedPDF]
15
15
 
16
- def extract_invoices: (Array[V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
16
+ def extract_invoices: (Array[Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
17
17
  end
18
18
  end
19
19
  end
@@ -6,7 +6,7 @@ module Mindee
6
6
  module InvoiceSplitter
7
7
  class InvoiceSplitterV1Document < Parsing::Common::Prediction
8
8
  def initialize: (Hash[String | Symbol, untyped], Integer?) -> void
9
- def invoice_page_groups: -> (Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups)
9
+ def invoice_page_groups: -> InvoiceSplitterV1InvoicePageGroups
10
10
  def invoice_page_groups_separator: (String) -> String
11
11
  def invoice_page_groups_to_s: -> String
12
12
  def to_s: -> String
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: mindee
3
3
  version: !ruby/object:Gem::Version
4
- version: 5.0.0.beta1
4
+ version: 5.0.0.rc1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Mindee, SA
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2026-04-08 00:00:00.000000000 Z
11
+ date: 2026-04-15 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: base64
@@ -231,7 +231,7 @@ files:
231
231
  - examples/auto_invoice_splitter_extraction.rb
232
232
  - examples/auto_multi_receipts_detector_extraction.rb
233
233
  - lib/mindee.rb
234
- - lib/mindee/dependency.rb
234
+ - lib/mindee/dependencies.rb
235
235
  - lib/mindee/error.rb
236
236
  - lib/mindee/error/mindee_error.rb
237
237
  - lib/mindee/error/mindee_http_error.rb
@@ -396,11 +396,11 @@ files:
396
396
  - lib/mindee/v1/product/universal/universal_prediction.rb
397
397
  - lib/mindee/v2.rb
398
398
  - lib/mindee/v2/client.rb
399
- - lib/mindee/v2/file_operation.rb
400
- - lib/mindee/v2/file_operation/crop.rb
401
- - lib/mindee/v2/file_operation/crop_files.rb
402
- - lib/mindee/v2/file_operation/split.rb
403
- - lib/mindee/v2/file_operation/split_files.rb
399
+ - lib/mindee/v2/file_operations.rb
400
+ - lib/mindee/v2/file_operations/crop.rb
401
+ - lib/mindee/v2/file_operations/crop_files.rb
402
+ - lib/mindee/v2/file_operations/split.rb
403
+ - lib/mindee/v2/file_operations/split_files.rb
404
404
  - lib/mindee/v2/http.rb
405
405
  - lib/mindee/v2/http/.rubocop.yml
406
406
  - lib/mindee/v2/http/api_v2_settings.rb
@@ -477,7 +477,7 @@ files:
477
477
  - sig/custom/net_http.rbs
478
478
  - sig/custom/origami.rbs
479
479
  - sig/mindee.rbs
480
- - sig/mindee/dependency.rbs
480
+ - sig/mindee/dependencies.rbs
481
481
  - sig/mindee/error/mindee_error.rbs
482
482
  - sig/mindee/error/mindee_http_error.rbs
483
483
  - sig/mindee/error/mindee_http_error_v2.rbs
@@ -1,6 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'file_operation/crop'
4
- require_relative 'file_operation/crop_files'
5
- require_relative 'file_operation/split'
6
- require_relative 'file_operation/split_files'