mindee 5.0.0.beta1 → 5.0.0.rc1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -2
- data/lib/mindee/{dependency.rb → dependencies.rb} +1 -1
- data/lib/mindee/image/image_extractor.rb +14 -16
- data/lib/mindee/input/sources/local_input_source.rb +9 -9
- data/lib/mindee/pdf/pdf_compressor.rb +1 -1
- data/lib/mindee/pdf/pdf_extractor.rb +4 -4
- data/lib/mindee/pdf/pdf_processor.rb +1 -1
- data/lib/mindee/pdf/pdf_tools.rb +1 -1
- data/lib/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rb +1 -1
- data/lib/mindee/v1.rb +1 -1
- data/lib/mindee/v2/{file_operation → file_operations}/crop.rb +2 -2
- data/lib/mindee/v2/file_operations.rb +6 -0
- data/lib/mindee/v2/product/split/split_response.rb +1 -1
- data/lib/mindee/v2.rb +1 -1
- data/lib/mindee/version.rb +1 -1
- data/lib/mindee.rb +1 -1
- data/sig/mindee/{dependency.rbs → dependencies.rbs} +1 -1
- data/sig/mindee/image/image_extractor.rbs +1 -1
- data/sig/mindee/input/base_parameters.rbs +1 -1
- data/sig/mindee/pdf/pdf_extractor.rbs +1 -1
- data/sig/mindee/v1/product/invoice_splitter/invoice_splitter_v1_document.rbs +1 -1
- metadata +9 -9
- data/lib/mindee/v2/file_operation.rb +0 -6
- /data/lib/mindee/v2/{file_operation → file_operations}/crop_files.rb +0 -0
- /data/lib/mindee/v2/{file_operation → file_operations}/split.rb +0 -0
- /data/lib/mindee/v2/{file_operation → file_operations}/split_files.rb +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 1b7650df6a4e2a2e94e5d147d40ce8c181959e8d2b654239a19f8a8db403aafd
|
|
4
|
+
data.tar.gz: dafe1f6fbd11fea9a0454a0e4cfb5f1a8ba3a26bac06e2d4ac8e977eb7d64bc8
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: f07d1f149857e63753efa589c6c7c1551ad2b7b6814919864957434e9cbe601fbc7b26bfc7e9a75d7ebd97748dbb3b75fbec09ba686c56b1144af4ead1d90b84
|
|
7
|
+
data.tar.gz: 72a41a55fdf34a461115bc4d6d898319002842abb4061b6be7eeceb3322a59cbf55c0d2acb304c70485eaae956c2a9209bcea51929a5ea70b3e225b458c93244
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,13 @@
|
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
|
2
2
|
|
|
3
|
+
## v5.0.0.rc1 - 2026-04-15
|
|
4
|
+
### ¡Breaking Changes!
|
|
5
|
+
* :recycle: :boom: change `FileOperation` module name to `FileOperations`
|
|
6
|
+
* :recycle: :boom: change `Dependency` module name to `Dependencies`
|
|
7
|
+
### Fixes
|
|
8
|
+
* :wrench: fix many typing issues
|
|
9
|
+
|
|
10
|
+
|
|
3
11
|
## v5.0.0.beta1 - 2026-04-07
|
|
4
12
|
### ¡Breaking Changes!
|
|
5
13
|
* :boom: :recycle: update V1 & V2 syntaxes to match other SDKs
|
|
@@ -25,7 +33,6 @@
|
|
|
25
33
|
* :coffin: Driver License V1
|
|
26
34
|
* :coffin: FR Energy Bill V1
|
|
27
35
|
* :coffin: Nutrition Facts V1
|
|
28
|
-
|
|
29
36
|
### Changes
|
|
30
37
|
* :sparkles: :arrow_up: add support for mindee-lite gem
|
|
31
38
|
* :sparkles: add support for crop operation
|
|
@@ -34,7 +41,6 @@
|
|
|
34
41
|
* :sparkles: add support for V2 CLI
|
|
35
42
|
* :wrench: :arrow_up: add better tooling and pre-commit hook
|
|
36
43
|
* :arrow_up: and bump all dependencies
|
|
37
|
-
|
|
38
44
|
### Fixes
|
|
39
45
|
* :bug: fix webhook IDs not sending properly
|
|
40
46
|
* :bug: fix miscellaneous issues leading to saved `ExtractedPDF` instances having invalid names
|
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
|
-
Mindee::
|
|
3
|
+
Mindee::Dependencies.require_all_deps!
|
|
4
4
|
require 'mini_magick'
|
|
5
5
|
require 'origami'
|
|
6
6
|
require 'stringio'
|
|
@@ -37,35 +37,33 @@ module Mindee
|
|
|
37
37
|
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
|
|
38
38
|
new_stream.seek(0)
|
|
39
39
|
|
|
40
|
-
extract_images_from_polygons(input_source,
|
|
40
|
+
extract_images_from_polygons(input_source, page_id, polygons)
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
# Extracts images from their positions on a file (as polygons).
|
|
44
44
|
#
|
|
45
45
|
# @param [Input::Source::LocalInputSource] input_source Local input source.
|
|
46
|
-
# @param [StringIO] pdf_stream Buffer of the PDF.
|
|
47
46
|
# @param [Integer] page_id Page ID.
|
|
48
47
|
# @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
|
|
49
48
|
# @return [Array<Image::ExtractedImage>] Extracted Images.
|
|
50
|
-
def self.extract_images_from_polygons(input_source,
|
|
49
|
+
def self.extract_images_from_polygons(input_source, page_id, polygons)
|
|
51
50
|
extracted_elements = [] # @type var extracted_elements: Array[Image::ExtractedImage]
|
|
52
51
|
|
|
52
|
+
input_source.io_stream.rewind
|
|
53
|
+
pdf_stream = StringIO.new(input_source.io_stream.read.to_s)
|
|
54
|
+
input_source.io_stream.rewind
|
|
53
55
|
polygons.each_with_index do |polygon, element_id|
|
|
54
56
|
polygon = ImageUtils.normalize_polygon(polygon)
|
|
55
57
|
page_content = ImageUtils.read_page_content(pdf_stream)
|
|
58
|
+
points = [
|
|
59
|
+
polygon.top_left,
|
|
60
|
+
polygon.bottom_right,
|
|
61
|
+
polygon.top_right,
|
|
62
|
+
polygon.bottom_left,
|
|
63
|
+
]
|
|
56
64
|
|
|
57
|
-
min_max_x = Geometry.get_min_max_x(
|
|
58
|
-
|
|
59
|
-
polygon.bottom_right,
|
|
60
|
-
polygon.top_right,
|
|
61
|
-
polygon.bottom_left,
|
|
62
|
-
])
|
|
63
|
-
min_max_y = Geometry.get_min_max_y([
|
|
64
|
-
polygon.top_left,
|
|
65
|
-
polygon.bottom_right,
|
|
66
|
-
polygon.top_right,
|
|
67
|
-
polygon.bottom_left,
|
|
68
|
-
])
|
|
65
|
+
min_max_x = Geometry.get_min_max_x(points)
|
|
66
|
+
min_max_y = Geometry.get_min_max_y(points)
|
|
69
67
|
file_extension = ImageUtils.determine_file_extension(input_source)
|
|
70
68
|
cropped_image = ImageUtils.crop_image(page_content, min_max_x, min_max_y)
|
|
71
69
|
if file_extension == 'pdf'
|
|
@@ -4,9 +4,9 @@ require 'stringio'
|
|
|
4
4
|
require 'marcel'
|
|
5
5
|
require 'fileutils'
|
|
6
6
|
|
|
7
|
-
require_relative '../../
|
|
8
|
-
require_relative '../../pdf' if Mindee::
|
|
9
|
-
require_relative '../../image' if Mindee::
|
|
7
|
+
require_relative '../../dependencies'
|
|
8
|
+
require_relative '../../pdf' if Mindee::Dependencies.all_deps_available?
|
|
9
|
+
require_relative '../../image' if Mindee::Dependencies.all_deps_available?
|
|
10
10
|
|
|
11
11
|
module Mindee
|
|
12
12
|
module Input
|
|
@@ -143,8 +143,8 @@ module Mindee
|
|
|
143
143
|
# Defaults to one for images.
|
|
144
144
|
# @return [Integer]
|
|
145
145
|
def page_count
|
|
146
|
-
unless Mindee::
|
|
147
|
-
raise NotImplementedError, Mindee::
|
|
146
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
147
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
148
148
|
end
|
|
149
149
|
return 1 unless pdf?
|
|
150
150
|
|
|
@@ -163,8 +163,8 @@ module Mindee
|
|
|
163
163
|
# @param [bool] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
|
164
164
|
# not. Needs force_source_text to work.
|
|
165
165
|
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
|
166
|
-
unless Mindee::
|
|
167
|
-
raise NotImplementedError, Mindee::
|
|
166
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
167
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
168
168
|
end
|
|
169
169
|
|
|
170
170
|
buffer = if pdf?
|
|
@@ -189,8 +189,8 @@ module Mindee
|
|
|
189
189
|
# Checks whether the file has source text if it is a pdf. `false` otherwise
|
|
190
190
|
# @return [bool] `true` if the file is a PDF and has source text.
|
|
191
191
|
def source_text?
|
|
192
|
-
unless Mindee::
|
|
193
|
-
raise NotImplementedError, Mindee::
|
|
192
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
193
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
194
194
|
end
|
|
195
195
|
|
|
196
196
|
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
|
@@ -7,8 +7,8 @@ module Mindee
|
|
|
7
7
|
class PDFExtractor
|
|
8
8
|
# @param local_input [Mindee::Input::Source::LocalInputSource]
|
|
9
9
|
def initialize(local_input)
|
|
10
|
-
unless Mindee::
|
|
11
|
-
raise NotImplementedError, Mindee::
|
|
10
|
+
unless Mindee::Dependencies.all_deps_available?
|
|
11
|
+
raise NotImplementedError, Mindee::Dependencies::MINDEE_DEPENDENCIES_LOAD_ERROR
|
|
12
12
|
end
|
|
13
13
|
|
|
14
14
|
@filename = local_input.filename
|
|
@@ -82,14 +82,14 @@ module Mindee
|
|
|
82
82
|
page_indexes_as_array = page_indexes # @type var page_indexes : Array[Array[Integer]]
|
|
83
83
|
return extract_sub_documents(page_indexes_as_array)
|
|
84
84
|
end
|
|
85
|
-
p_ids = page_indexes # @type var page_indexes: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
|
|
85
|
+
p_ids = page_indexes # @type var page_indexes: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroups
|
|
86
86
|
return extract_sub_documents(p_ids.map(&:page_indexes)) unless strict
|
|
87
87
|
|
|
88
88
|
correct_page_indexes = [] # @type var correct_page_indexes: Array[Array[Integer]]
|
|
89
89
|
current_list = [] # @type var current_list: Array[Integer]
|
|
90
90
|
previous_confidence = nil
|
|
91
91
|
p_ids.each_with_index do |p_i, i|
|
|
92
|
-
page_index = p_i # @type var page_index: Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
|
|
92
|
+
page_index = p_i # @type var page_index: Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup
|
|
93
93
|
confidence = page_index.confidence.to_f
|
|
94
94
|
page_list = page_index.page_indexes
|
|
95
95
|
|
data/lib/mindee/pdf/pdf_tools.rb
CHANGED
|
@@ -19,7 +19,7 @@ module Mindee
|
|
|
19
19
|
# @param page_id [Integer, nil]
|
|
20
20
|
def initialize(prediction, page_id)
|
|
21
21
|
super
|
|
22
|
-
@invoice_page_groups =
|
|
22
|
+
@invoice_page_groups = InvoiceSplitterV1InvoicePageGroups.new(
|
|
23
23
|
prediction['invoice_page_groups'], page_id
|
|
24
24
|
)
|
|
25
25
|
end
|
data/lib/mindee/v1.rb
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require_relative 'v1/client'
|
|
4
|
-
require_relative 'v1/extraction' if Mindee::
|
|
4
|
+
require_relative 'v1/extraction' if Mindee::Dependencies.all_deps_available?
|
|
5
5
|
require_relative 'v1/http'
|
|
6
6
|
require_relative 'v1/parsing'
|
|
7
7
|
require_relative 'v1/product'
|
|
@@ -17,11 +17,11 @@ module Mindee
|
|
|
17
17
|
).first
|
|
18
18
|
end
|
|
19
19
|
|
|
20
|
-
# Extracts
|
|
20
|
+
# Extracts multiple crop zones from an image.
|
|
21
21
|
#
|
|
22
22
|
# @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
|
|
23
23
|
# @param crops [Array<CropItem>] List of crops.
|
|
24
|
-
# @return [CropFiles] Individual extracted
|
|
24
|
+
# @return [CropFiles] Individual extracted zones as an array of ExtractedImage.
|
|
25
25
|
# @raise [MindeeError] if the crops array is empty.
|
|
26
26
|
def self.extract_crops(input_source, crops)
|
|
27
27
|
if crops.nil? || crops.empty?
|
|
@@ -26,7 +26,7 @@ module Mindee
|
|
|
26
26
|
@inference.to_s
|
|
27
27
|
end
|
|
28
28
|
|
|
29
|
-
#
|
|
29
|
+
# Splits the input PDF.
|
|
30
30
|
# @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
|
|
31
31
|
# @return [FileOperation::SplitFiles]
|
|
32
32
|
def extract_from_file(input_source)
|
data/lib/mindee/v2.rb
CHANGED
|
@@ -2,6 +2,6 @@
|
|
|
2
2
|
|
|
3
3
|
require_relative 'v2/client'
|
|
4
4
|
require_relative 'v2/http'
|
|
5
|
-
require_relative 'v2/
|
|
5
|
+
require_relative 'v2/file_operations' if Mindee::Dependencies.all_deps_available?
|
|
6
6
|
require_relative 'v2/parsing'
|
|
7
7
|
require_relative 'v2/product'
|
data/lib/mindee/version.rb
CHANGED
data/lib/mindee.rb
CHANGED
|
@@ -5,7 +5,7 @@ module Mindee
|
|
|
5
5
|
def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF
|
|
6
6
|
def self.to_blob: () -> String
|
|
7
7
|
def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
|
|
8
|
-
def self.extract_images_from_polygons: (Input::Source::LocalInputSource,
|
|
8
|
+
def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> Array[ExtractedImage]
|
|
9
9
|
def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage
|
|
10
10
|
def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File)
|
|
11
11
|
end
|
|
@@ -5,7 +5,7 @@ module Mindee
|
|
|
5
5
|
class BaseParameters
|
|
6
6
|
attr_reader self.slug: String
|
|
7
7
|
|
|
8
|
-
def self.from_hash: (params: Hash[String | Symbol, untyped]) ->
|
|
8
|
+
def self.from_hash: (params: Hash[String | Symbol, untyped]) -> BaseParameters
|
|
9
9
|
def self.load_from_hash: (params: Hash[String | Symbol, untyped]) -> Hash[String | Symbol, untyped]
|
|
10
10
|
|
|
11
11
|
def slug: -> String
|
|
@@ -13,7 +13,7 @@ module Mindee
|
|
|
13
13
|
|
|
14
14
|
def extract_sub_documents: (Array[Array[Integer]]) -> Array[ExtractedPDF]
|
|
15
15
|
|
|
16
|
-
def extract_invoices: (Array[V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
|
|
16
|
+
def extract_invoices: (Array[Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> Array[ExtractedPDF]
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
end
|
|
@@ -6,7 +6,7 @@ module Mindee
|
|
|
6
6
|
module InvoiceSplitter
|
|
7
7
|
class InvoiceSplitterV1Document < Parsing::Common::Prediction
|
|
8
8
|
def initialize: (Hash[String | Symbol, untyped], Integer?) -> void
|
|
9
|
-
def invoice_page_groups: ->
|
|
9
|
+
def invoice_page_groups: -> InvoiceSplitterV1InvoicePageGroups
|
|
10
10
|
def invoice_page_groups_separator: (String) -> String
|
|
11
11
|
def invoice_page_groups_to_s: -> String
|
|
12
12
|
def to_s: -> String
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mindee
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.0.0.
|
|
4
|
+
version: 5.0.0.rc1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mindee, SA
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-15 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -231,7 +231,7 @@ files:
|
|
|
231
231
|
- examples/auto_invoice_splitter_extraction.rb
|
|
232
232
|
- examples/auto_multi_receipts_detector_extraction.rb
|
|
233
233
|
- lib/mindee.rb
|
|
234
|
-
- lib/mindee/
|
|
234
|
+
- lib/mindee/dependencies.rb
|
|
235
235
|
- lib/mindee/error.rb
|
|
236
236
|
- lib/mindee/error/mindee_error.rb
|
|
237
237
|
- lib/mindee/error/mindee_http_error.rb
|
|
@@ -396,11 +396,11 @@ files:
|
|
|
396
396
|
- lib/mindee/v1/product/universal/universal_prediction.rb
|
|
397
397
|
- lib/mindee/v2.rb
|
|
398
398
|
- lib/mindee/v2/client.rb
|
|
399
|
-
- lib/mindee/v2/
|
|
400
|
-
- lib/mindee/v2/
|
|
401
|
-
- lib/mindee/v2/
|
|
402
|
-
- lib/mindee/v2/
|
|
403
|
-
- lib/mindee/v2/
|
|
399
|
+
- lib/mindee/v2/file_operations.rb
|
|
400
|
+
- lib/mindee/v2/file_operations/crop.rb
|
|
401
|
+
- lib/mindee/v2/file_operations/crop_files.rb
|
|
402
|
+
- lib/mindee/v2/file_operations/split.rb
|
|
403
|
+
- lib/mindee/v2/file_operations/split_files.rb
|
|
404
404
|
- lib/mindee/v2/http.rb
|
|
405
405
|
- lib/mindee/v2/http/.rubocop.yml
|
|
406
406
|
- lib/mindee/v2/http/api_v2_settings.rb
|
|
@@ -477,7 +477,7 @@ files:
|
|
|
477
477
|
- sig/custom/net_http.rbs
|
|
478
478
|
- sig/custom/origami.rbs
|
|
479
479
|
- sig/mindee.rbs
|
|
480
|
-
- sig/mindee/
|
|
480
|
+
- sig/mindee/dependencies.rbs
|
|
481
481
|
- sig/mindee/error/mindee_error.rbs
|
|
482
482
|
- sig/mindee/error/mindee_http_error.rbs
|
|
483
483
|
- sig/mindee/error/mindee_http_error_v2.rbs
|
|
File without changes
|
|
File without changes
|
|
File without changes
|