mindee 4.7.0.pre.rc1 → 4.7.0.pre.rc2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/docs/code_samples/default_v2.txt +3 -3
- data/lib/mindee/errors/mindee_input_error.rb +1 -1
- data/lib/mindee/image/image_extractor.rb +3 -6
- data/lib/mindee/input/local_response.rb +2 -2
- data/lib/mindee/input/sources/local_input_source.rb +31 -19
- data/lib/mindee/parsing/universal/universal_object_field.rb +1 -1
- data/lib/mindee/parsing/v2/field/field_confidence.rb +5 -5
- data/lib/mindee/parsing/v2/field/inference_fields.rb +1 -1
- data/lib/mindee/parsing/v2/field/list_field.rb +2 -2
- data/lib/mindee/parsing/v2/field/object_field.rb +1 -1
- data/lib/mindee/parsing/v2/field/simple_field.rb +0 -1
- data/lib/mindee/pdf/pdf_tools.rb +2 -2
- data/lib/mindee/version.rb +1 -1
- data/sig/custom/mini_magick.rbs +1 -1
- data/sig/mindee/input/local_response.rbs +1 -1
- data/sig/mindee/input/sources/local_input_source.rbs +5 -1
- data/sig/mindee/parsing/v2/field/list_field.rbs +0 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 95c963d2c0530278f42fa3543609db9f34156d74e031c08c929be8043794425e
|
4
|
+
data.tar.gz: 92b4cfd60be3f710de9bdab714d4291b152d6d253373dcfae20428a940ad5ec9
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 64be40a13e4662730c5749d5348860abf30e0c44002e4501d19a99a8c51c4afb2a83f9d901b1b3b4bbc43d255d8db3a27410b4ff5d274028092b7ec1368d830d
|
7
|
+
data.tar.gz: 5e9201f8c7d9beee93f601e0fe12758c20f55175c7c0476544bada2d1f8a7828962c848c75bb882169dea9c677649f53b3e0eba824cbdc559810f196fb3c0a75
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v4.7.0-rc2 - 2025-08-20
|
4
|
+
### Changes
|
5
|
+
* :sparkles: add missing accessors for PDF fixing options in `LocalInputSource`
|
6
|
+
### Fixes
|
7
|
+
* :recycle: update existing PDF fixing syntax
|
8
|
+
* :memo: fix typos & documentation
|
9
|
+
|
10
|
+
|
3
11
|
## v4.7.0-rc1 - 2025-08-13
|
4
12
|
### Changes
|
5
13
|
* :sparkles: add support for client V2 & associated features
|
@@ -8,10 +8,10 @@ model_id = 'MY_MODEL_ID'
|
|
8
8
|
mindee_client = Mindee::ClientV2.new(api_key: api_key)
|
9
9
|
|
10
10
|
# Set inference parameters
|
11
|
-
|
11
|
+
inference_params = Mindee::Input::InferenceParameters.new(
|
12
12
|
# ID of the model, required.
|
13
13
|
model_id,
|
14
|
-
# If set to `
|
14
|
+
# If set to `true`, will enable Retrieval-Augmented Generation.
|
15
15
|
rag: false,
|
16
16
|
)
|
17
17
|
|
@@ -21,7 +21,7 @@ input_source = Mindee::Input::Source::PathInputSource.new(input_path)
|
|
21
21
|
# Send for processing
|
22
22
|
response = mindee_client.enqueue_and_get_inference(
|
23
23
|
input_source,
|
24
|
-
|
24
|
+
inference_params # Note: this parameter can also be provided as a Hash.
|
25
25
|
)
|
26
26
|
|
27
27
|
# Print a brief summary of the parsed data
|
@@ -16,7 +16,7 @@ module Mindee
|
|
16
16
|
# @param mime_type [String]
|
17
17
|
def initialize(mime_type)
|
18
18
|
@invalid_mimetype = mime_type
|
19
|
-
super("'#{@invalid_mimetype}' mime type not allowed, must be one of" \
|
19
|
+
super("'#{@invalid_mimetype}' mime type not allowed, must be one of " \
|
20
20
|
"#{Mindee::Input::Source::ALLOWED_MIME_TYPES.join(', ')}")
|
21
21
|
end
|
22
22
|
end
|
@@ -18,12 +18,10 @@ module Mindee
|
|
18
18
|
# @return [Origami::PDF] A PdfDocument handle.
|
19
19
|
def self.attach_image_as_new_file(input_buffer, format: 'jpg')
|
20
20
|
magick_image = MiniMagick::Image.read(input_buffer)
|
21
|
-
# NOTE:
|
22
|
-
# converted.
|
21
|
+
# NOTE: We force format consolidation to a single format to avoid frames being interpreted as the final output.
|
23
22
|
magick_image.format(format)
|
24
23
|
original_density = magick_image.resolution
|
25
|
-
scale_factor = original_density[0].to_f / 4.166666 #
|
26
|
-
# the pdf otherwise the resulting image shrinks.
|
24
|
+
scale_factor = original_density[0].to_f / 4.166666 # Convert from default 300 DPI to 72.
|
27
25
|
magick_image.format('pdf', 0, { density: scale_factor.to_s })
|
28
26
|
Origami::PDF.read(StringIO.new(magick_image.to_blob))
|
29
27
|
end
|
@@ -32,8 +30,7 @@ module Mindee
|
|
32
30
|
#
|
33
31
|
# @param [Input::Source::LocalInputSource] input_source
|
34
32
|
# @param [Integer] page_id ID of the Page to extract from.
|
35
|
-
# @param [Array<Array<Geometry::Point>>, Array<Geometry::Quadrilateral>] polygons List of coordinates.
|
36
|
-
# to extract.
|
33
|
+
# @param [Array<Array<Geometry::Point>>, Array<Geometry::Quadrilateral>] polygons List of coordinates to extract.
|
37
34
|
# @return [Array<Image::ExtractedImage>] Extracted Images.
|
38
35
|
def self.extract_multiple_images_from_source(input_source, page_id, polygons)
|
39
36
|
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
|
@@ -69,10 +69,10 @@ module Mindee
|
|
69
69
|
end
|
70
70
|
|
71
71
|
# Deserializes a loaded response
|
72
|
-
# @param response_class [Parsing::V2::
|
72
|
+
# @param response_class [Parsing::V2::CommonResponse] class to return.
|
73
73
|
# @return [Parsing::V2::JobResponse, Parsing::V2::InferenceResponse]
|
74
74
|
def deserialize_response(response_class)
|
75
|
-
response_class.new(as_hash)
|
75
|
+
response_class.new(as_hash) # : Parsing::V2::JobResponse | Parsing::V2::InferenceResponse
|
76
76
|
rescue StandardError
|
77
77
|
raise Errors::MindeeInputError, 'Invalid response provided.'
|
78
78
|
end
|
@@ -47,8 +47,7 @@ module Mindee
|
|
47
47
|
end
|
48
48
|
|
49
49
|
if filename.end_with?('.pdf') && repair_pdf
|
50
|
-
|
51
|
-
@file_mimetype = Marcel::MimeType.for @io_stream
|
50
|
+
fix_pdf!
|
52
51
|
|
53
52
|
logger.debug("Loaded new input #{@filename} from #{self.class}")
|
54
53
|
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
@@ -57,27 +56,40 @@ module Mindee
|
|
57
56
|
raise Errors::MindeeMimeTypeError, @file_mimetype.to_s
|
58
57
|
end
|
59
58
|
|
60
|
-
#
|
61
|
-
|
62
|
-
|
63
|
-
# @param stream [StringIO, File]
|
64
|
-
def rescue_broken_pdf(stream)
|
65
|
-
stream.gets('%PDF-')
|
66
|
-
raise Errors::MindeePDFError if stream.eof? || stream.pos > 500
|
67
|
-
|
68
|
-
stream.pos = stream.pos - 5
|
69
|
-
data = stream.read
|
70
|
-
@io_stream.close
|
71
|
-
|
72
|
-
@io_stream = StringIO.new
|
73
|
-
@io_stream << data
|
59
|
+
# @deprecated See {#fix_pdf!} or {#self.fix_pdf} instead.
|
60
|
+
def rescue_broken_pdf(_)
|
61
|
+
fix_pdf!
|
74
62
|
end
|
75
63
|
|
76
|
-
# Shorthand for
|
64
|
+
# Shorthand for PDF mimetype validation.
|
77
65
|
def pdf?
|
78
66
|
@file_mimetype.to_s == 'application/pdf'
|
79
67
|
end
|
80
68
|
|
69
|
+
# Attempts to fix the PDF data in the file.
|
70
|
+
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
71
|
+
# @return [void]
|
72
|
+
# @raise [Mindee::Errors::MindeePDFError]
|
73
|
+
def fix_pdf!(maximum_offset: 500)
|
74
|
+
@io_stream = LocalInputSource.fix_pdf(@io_stream, maximum_offset: maximum_offset)
|
75
|
+
@io_stream.rewind
|
76
|
+
@file_mimetype = Marcel::MimeType.for @io_stream
|
77
|
+
end
|
78
|
+
|
79
|
+
# Attempt to fix the PDF data in the given stream.
|
80
|
+
# @param stream [StringIO] The stream to fix.
|
81
|
+
# @param maximum_offset [Integer] Maximum offset to look for the PDF header.
|
82
|
+
# @return [StringIO] The fixed stream.
|
83
|
+
# @raise [Mindee::Errors::MindeePDFError]
|
84
|
+
def self.fix_pdf(stream, maximum_offset: 500)
|
85
|
+
out_stream = StringIO.new
|
86
|
+
stream.gets('%PDF-')
|
87
|
+
raise Errors::MindeePDFError if stream.eof? || stream.pos > maximum_offset
|
88
|
+
|
89
|
+
stream.pos = stream.pos - 5
|
90
|
+
out_stream << stream.read
|
91
|
+
end
|
92
|
+
|
81
93
|
# Cuts a PDF file according to provided options.
|
82
94
|
# @param options [PageOptions, nil] Page cutting/merge options:
|
83
95
|
#
|
@@ -166,8 +178,8 @@ module Mindee
|
|
166
178
|
@io_stream.rewind
|
167
179
|
end
|
168
180
|
|
169
|
-
# Checks whether the file has source text if it is a pdf.
|
170
|
-
# @return [bool]
|
181
|
+
# Checks whether the file has source text if it is a pdf. `false` otherwise
|
182
|
+
# @return [bool] `true` if the file is a PDF and has source text.
|
171
183
|
def source_text?
|
172
184
|
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
173
185
|
end
|
@@ -78,7 +78,7 @@ module Mindee
|
|
78
78
|
#
|
79
79
|
# @param method_name [Symbol] The name of the method being checked.
|
80
80
|
# @param include_private [bool] Whether to include private methods in the check.
|
81
|
-
# @return [bool]
|
81
|
+
# @return [bool] `true` if the method can be responded to, false otherwise.
|
82
82
|
def respond_to_missing?(method_name, include_private = false)
|
83
83
|
@all_values.key?(method_name.to_s) || super
|
84
84
|
end
|
@@ -40,25 +40,25 @@ module Mindee
|
|
40
40
|
end
|
41
41
|
|
42
42
|
# Check if this is a certain confidence level.
|
43
|
-
# @return [Boolean]
|
43
|
+
# @return [Boolean] `true` if confidence is certain.
|
44
44
|
def certain?
|
45
45
|
@value == CERTAIN
|
46
46
|
end
|
47
47
|
|
48
48
|
# Check if this is a high confidence level.
|
49
|
-
# @return [Boolean]
|
49
|
+
# @return [Boolean] `true` if confidence is high.
|
50
50
|
def high?
|
51
51
|
@value == HIGH
|
52
52
|
end
|
53
53
|
|
54
54
|
# Check if this is a medium confidence level.
|
55
|
-
# @return [Boolean]
|
55
|
+
# @return [Boolean] `true` if confidence is medium.
|
56
56
|
def medium?
|
57
57
|
@value == MEDIUM
|
58
58
|
end
|
59
59
|
|
60
60
|
# Check if this is a low confidence level.
|
61
|
-
# @return [Boolean]
|
61
|
+
# @return [Boolean] `true` if confidence is low.
|
62
62
|
def low?
|
63
63
|
@value == LOW
|
64
64
|
end
|
@@ -71,7 +71,7 @@ module Mindee
|
|
71
71
|
|
72
72
|
# Compare two FieldConfidence instances.
|
73
73
|
# @param other [FieldConfidence] The other confidence to compare.
|
74
|
-
# @return [Boolean]
|
74
|
+
# @return [Boolean] `true` if they have the same value.
|
75
75
|
def ==(other)
|
76
76
|
other.is_a?(FieldConfidence) && @value == other.value
|
77
77
|
end
|
@@ -43,7 +43,7 @@ module Mindee
|
|
43
43
|
|
44
44
|
# Check if method_missing should handle the method.
|
45
45
|
# @param method_name [Symbol] The method name.
|
46
|
-
# @return [Boolean]
|
46
|
+
# @return [Boolean] `true` if the method should be handled.
|
47
47
|
def respond_to_missing?(method_name, include_private = false)
|
48
48
|
key?(method_name.to_s) || super
|
49
49
|
end
|
@@ -49,7 +49,7 @@ module Mindee
|
|
49
49
|
end
|
50
50
|
|
51
51
|
# Check if the list is empty.
|
52
|
-
# @return [Boolean]
|
52
|
+
# @return [Boolean] `true` if the list has no items.
|
53
53
|
def empty?
|
54
54
|
@items.empty?
|
55
55
|
end
|
@@ -74,7 +74,7 @@ module Mindee
|
|
74
74
|
end
|
75
75
|
|
76
76
|
# Iterator for Enumerator inheritance.
|
77
|
-
# NOTE: Untyped due to incomplete support in
|
77
|
+
# NOTE: Untyped due to incomplete support in current supported version of RBS.
|
78
78
|
def each(&block)
|
79
79
|
return to_enum(:each) unless block_given?
|
80
80
|
|
@@ -91,7 +91,7 @@ module Mindee
|
|
91
91
|
|
92
92
|
# Check if method_missing should handle the method.
|
93
93
|
# @param method_name [Symbol] The method name.
|
94
|
-
# @return [Boolean]
|
94
|
+
# @return [Boolean] `true` if the method should be handled.
|
95
95
|
def respond_to_missing?(method_name, include_private = false)
|
96
96
|
@fields.respond_to?(method_name) || super
|
97
97
|
end
|
@@ -26,7 +26,6 @@ module Mindee
|
|
26
26
|
if @value.is_a?(TrueClass) || @value.is_a?(FalseClass)
|
27
27
|
@value ? 'True' : 'False'
|
28
28
|
elsif @value.is_a?(Integer) || @value.is_a?(Float)
|
29
|
-
# NOTE: explicitly typing because steep is very, very dumb
|
30
29
|
num = @value # @type var num: Integer | Float
|
31
30
|
format_numeric_value(num)
|
32
31
|
else
|
data/lib/mindee/pdf/pdf_tools.rb
CHANGED
@@ -44,7 +44,7 @@ module Mindee
|
|
44
44
|
# Checks a PDFs stream content for text operators
|
45
45
|
# See https://opensource.adobe.com/dc-acrobat-sdk-docs/pdfstandards/PDF32000_2008.pdf page 243-251.
|
46
46
|
# @param [StringIO] stream Stream object from a PDFs page.
|
47
|
-
# @return [bool]
|
47
|
+
# @return [bool] `true` if a text operator is found in the stream.
|
48
48
|
def self.stream_has_text?(stream)
|
49
49
|
data = stream.data
|
50
50
|
return false if data.nil? || data.empty?
|
@@ -55,7 +55,7 @@ module Mindee
|
|
55
55
|
|
56
56
|
# Checks whether the file has source_text. Sends false if the file isn't a PDF.
|
57
57
|
# @param [StringIO] pdf_data Abinary-encoded stream representing the PDF file.
|
58
|
-
# @return [bool]
|
58
|
+
# @return [bool] `true` if the pdf has source text, false otherwise.
|
59
59
|
def self.source_text?(pdf_data)
|
60
60
|
begin
|
61
61
|
pdf_data.rewind
|
data/lib/mindee/version.rb
CHANGED
data/sig/custom/mini_magick.rbs
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# Stub for the mini_magick library.
|
2
|
-
#
|
2
|
+
# NOTE: though typing annotations for the MiniMagick library now exist, it seems that they aren't strict enough
|
3
3
|
# to match the rules we have on the repo, hence the existence of this file and the overrides present below.
|
4
4
|
module MiniMagick
|
5
5
|
class Image
|
@@ -8,7 +8,7 @@ module Mindee
|
|
8
8
|
def self.process_secret_key: (String) -> String
|
9
9
|
def get_hmac_signature: (String) -> String
|
10
10
|
def valid_hmac_signature?: (String, String) -> bool
|
11
|
-
def deserialize_response: (singleton(Parsing::V2::
|
11
|
+
def deserialize_response: (singleton(Parsing::V2::CommonResponse))-> (Parsing::V2::JobResponse | Parsing::V2::InferenceResponse)
|
12
12
|
end
|
13
13
|
end
|
14
14
|
end
|
@@ -8,10 +8,14 @@ module Mindee
|
|
8
8
|
attr_reader filename: String
|
9
9
|
attr_reader io_stream: StringIO | File
|
10
10
|
def initialize: (StringIO | File, String, ?repair_pdf: bool) -> void
|
11
|
+
|
12
|
+
def fix_pdf!: (?maximum_offset: Integer) -> void
|
13
|
+
def self.fix_pdf: (StringIO | File, ?maximum_offset: Integer) -> StringIO
|
14
|
+
|
11
15
|
def logger: () -> Logger
|
12
16
|
|
13
17
|
|
14
|
-
def rescue_broken_pdf: (
|
18
|
+
def rescue_broken_pdf: (untyped) -> void
|
15
19
|
def pdf?: -> bool
|
16
20
|
def apply_page_options: (PageOptions) -> StringIO?
|
17
21
|
def process_pdf: (PageOptions) -> StringIO?
|
@@ -12,7 +12,6 @@ module Mindee
|
|
12
12
|
def empty?: -> bool
|
13
13
|
def size: -> Integer
|
14
14
|
def length: -> Integer
|
15
|
-
# NOTE: Steep is incapable of handling typing of `each` when multiple types are used.
|
16
15
|
def each: () { (untyped) -> untyped } -> untyped
|
17
16
|
def []: (Integer) -> (BaseField)
|
18
17
|
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 4.7.0.pre.
|
4
|
+
version: 4.7.0.pre.rc2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2025-08-
|
11
|
+
date: 2025-08-20 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: base64
|