mindee 5.2.2 → 5.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +9 -0
- data/SKILL.md +172 -0
- data/bin/v2/parser.rb +8 -4
- data/lib/mindee/image/extracted_image.rb +1 -8
- data/lib/mindee/image/extracted_images.rb +20 -0
- data/lib/mindee/image/image_extractor.rb +26 -8
- data/lib/mindee/image/image_utils.rb +0 -17
- data/lib/mindee/image.rb +1 -0
- data/lib/mindee/pdf/extracted_pdf.rb +1 -1
- data/lib/mindee/pdf/extracted_pdfs.rb +20 -0
- data/lib/mindee/pdf/pdf_extractor.rb +10 -10
- data/lib/mindee/pdf.rb +1 -0
- data/lib/mindee/v2/file_operations/crop.rb +7 -6
- data/lib/mindee/v2/file_operations/split.rb +2 -2
- data/lib/mindee/v2/file_operations.rb +0 -2
- data/lib/mindee/v2/parsing/failed_inference_response.rb +34 -0
- data/lib/mindee/v2/parsing.rb +1 -0
- data/lib/mindee/v2/product/crop/crop_item.rb +1 -1
- data/lib/mindee/v2/product/crop/crop_response.rb +0 -8
- data/lib/mindee/v2/product/crop/crop_result.rb +8 -0
- data/lib/mindee/v2/product/split/split_range.rb +1 -1
- data/lib/mindee/v2/product/split/split_response.rb +0 -8
- data/lib/mindee/v2/product/split/split_result.rb +7 -0
- data/lib/mindee/version.rb +1 -1
- data/sig/mindee/image/extracted_image.rbs +0 -1
- data/sig/mindee/image/extracted_images.rbs +8 -0
- data/sig/mindee/image/image_extractor.rbs +3 -2
- data/sig/mindee/image/image_utils.rbs +0 -1
- data/sig/mindee/pdf/extracted_pdfs.rbs +8 -0
- data/sig/mindee/pdf/pdf_extractor.rbs +2 -2
- data/sig/mindee/v2/file_operation/crop.rbs +1 -1
- data/sig/mindee/v2/file_operation/split.rbs +1 -1
- data/sig/mindee/v2/parsing/failed_inference_response.rbs +15 -0
- data/sig/mindee/v2/product/crop/crop_item.rbs +1 -1
- data/sig/mindee/v2/product/crop/crop_response.rbs +0 -2
- data/sig/mindee/v2/product/crop/crop_result.rbs +1 -0
- data/sig/mindee/v2/product/split/split_range.rbs +1 -1
- data/sig/mindee/v2/product/split/split_response.rbs +0 -2
- data/sig/mindee/v2/product/split/split_result.rbs +3 -0
- metadata +9 -6
- data/lib/mindee/v2/file_operations/crop_files.rb +0 -25
- data/lib/mindee/v2/file_operations/split_files.rb +0 -25
- data/sig/mindee/v2/file_operation/crop_files.rbs +0 -9
- data/sig/mindee/v2/file_operation/split_files.rbs +0 -9
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: d8eb9e96fa7c4b8a41479c87152f858112a72c52f18c23ad247c220615d4df15
|
|
4
|
+
data.tar.gz: 44da90b0f6e814f13cd418502998e3e17610e76857199744c8f2e98b217d72c0
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: 543a63c5085472676f044b20721f03844ad3f425f4fef936c147240b0c286da6f81d8ceffb9ebb90b6c70340426e0529350406b0acfb685176cedb85968ddc06
|
|
7
|
+
data.tar.gz: 591a11727b309a1ac91840ae3dc260f1bcd5186c840b82a71bdc815a8dd3741500191801491ce25edcc44178077d5dd02d437b1c388f9164f1b80c49114a9ea4
|
data/CHANGELOG.md
CHANGED
|
@@ -1,5 +1,14 @@
|
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
|
2
2
|
|
|
3
|
+
## v5.3.0 - 2026-07-03
|
|
4
|
+
### Changes:
|
|
5
|
+
* :memo: add SKILL file
|
|
6
|
+
* :recycle: use generic array types for images, PDFs
|
|
7
|
+
* :sparkles: allow passing webhooks to the CLI
|
|
8
|
+
* :sparkles: add webhook error response
|
|
9
|
+
### Fixes:
|
|
10
|
+
* :bug: fix for extraction method naming
|
|
11
|
+
|
|
3
12
|
## v5.2.2 - 2026-06-18
|
|
4
13
|
### Changes
|
|
5
14
|
* :recycle: refactor CLI to conform with other client libraries
|
data/SKILL.md
ADDED
|
@@ -0,0 +1,172 @@
|
|
|
1
|
+
# Mindee Ruby SDK
|
|
2
|
+
|
|
3
|
+
Use this skill for Mindee V2 integrations with the official Ruby SDK.
|
|
4
|
+
|
|
5
|
+
## Scope
|
|
6
|
+
|
|
7
|
+
- Use the official `mindee` Ruby gem.
|
|
8
|
+
- Focus on SDK-based integration patterns only.
|
|
9
|
+
- Do not suggest direct HTTP calls, cURL, or non-SDK integrations.
|
|
10
|
+
- Do not use undocumented SDK internals.
|
|
11
|
+
|
|
12
|
+
## Primary documentation
|
|
13
|
+
|
|
14
|
+
### SDK overview
|
|
15
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk.md
|
|
16
|
+
|
|
17
|
+
### Client setup
|
|
18
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/configure-the-client.md
|
|
19
|
+
|
|
20
|
+
### Model parameters
|
|
21
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/basic-model-configuration.md
|
|
22
|
+
|
|
23
|
+
### Load local files
|
|
24
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/load-and-adjust-a-file.md
|
|
25
|
+
|
|
26
|
+
### Load remote URLs
|
|
27
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/load-an-url.md
|
|
28
|
+
|
|
29
|
+
### Send files and URLs
|
|
30
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/send-a-file-or-url.md
|
|
31
|
+
|
|
32
|
+
### Process responses
|
|
33
|
+
- https://docs.mindee.com/integrations/client-libraries-sdk/process-the-response.md
|
|
34
|
+
|
|
35
|
+
### Handle errors
|
|
36
|
+
- https://docs.mindee.com/integrations/problem-database.md
|
|
37
|
+
|
|
38
|
+
## Handling responses by model type
|
|
39
|
+
|
|
40
|
+
### Extraction
|
|
41
|
+
- Use: https://docs.mindee.com/extraction-models/sdk-integration/extraction-result.md
|
|
42
|
+
- Use this page for accessing dynamic fields from `response.inference.result.fields`.
|
|
43
|
+
- Use this page for examples of `SimpleField`, `ObjectField`, `ListField`, confidence, and locations.
|
|
44
|
+
|
|
45
|
+
### Split
|
|
46
|
+
- Use: https://docs.mindee.com/split-models/sdk-integration/split-result.md
|
|
47
|
+
- Use this page for iterating over `response.inference.result.splits`.
|
|
48
|
+
- Use this page for `document_type`, `page_range`, and optional chained extraction results.
|
|
49
|
+
|
|
50
|
+
### Crop
|
|
51
|
+
- Use: https://docs.mindee.com/crop-models/sdk-integration/crop-result.md
|
|
52
|
+
- Use this page for iterating over `response.inference.result.crops`.
|
|
53
|
+
- Use this page for `object_type`, crop location, polygon data, and optional chained extraction results.
|
|
54
|
+
|
|
55
|
+
### Classification
|
|
56
|
+
- Use: https://docs.mindee.com/classification-models/sdk-integration/classification-result.md
|
|
57
|
+
- Use this page for accessing `response.inference.result.classification`.
|
|
58
|
+
- Use this page for `document_type` and optional chained extraction results.
|
|
59
|
+
|
|
60
|
+
### OCR
|
|
61
|
+
- Use: https://docs.mindee.com/raw-text-ocr-models/sdk-integration/ocr-result.md
|
|
62
|
+
- Use this page for iterating over `response.inference.result.pages`.
|
|
63
|
+
- Use this page for page text (`content`), words, and word polygon data.
|
|
64
|
+
|
|
65
|
+
## Default workflow
|
|
66
|
+
|
|
67
|
+
When answering questions, follow this order:
|
|
68
|
+
|
|
69
|
+
1. Initialize the SDK client.
|
|
70
|
+
2. Configure `model_id` and other inference parameters.
|
|
71
|
+
3. Load the input source.
|
|
72
|
+
4. Optionally adjust the file before upload.
|
|
73
|
+
5. Send with polling or webhooks.
|
|
74
|
+
6. Process the response.
|
|
75
|
+
7. Handle errors and retries.
|
|
76
|
+
|
|
77
|
+
## Answering rules
|
|
78
|
+
|
|
79
|
+
- Base answers on the documentation above.
|
|
80
|
+
- Prefer documented SDK methods and patterns.
|
|
81
|
+
- Use environment variables for API keys in production.
|
|
82
|
+
- Reuse a client instance when possible.
|
|
83
|
+
- Prefer polling for simple examples.
|
|
84
|
+
- Prefer webhooks for production or high-volume workflows.
|
|
85
|
+
- If a feature is not documented, say it is not officially supported.
|
|
86
|
+
- If a user asks for code, keep examples minimal and working.
|
|
87
|
+
|
|
88
|
+
## Code sample rules
|
|
89
|
+
|
|
90
|
+
- Use Ruby examples only.
|
|
91
|
+
- Use the official `mindee` gem.
|
|
92
|
+
- Show requires explicitly.
|
|
93
|
+
- Include the exact documented class and method names.
|
|
94
|
+
- Use placeholders like `MY_API_KEY`, `MY_MODEL_ID`, and `/path/to/file.pdf`.
|
|
95
|
+
- Keep samples focused on one task.
|
|
96
|
+
|
|
97
|
+
## Preferred example topics
|
|
98
|
+
|
|
99
|
+
### Client initialization
|
|
100
|
+
Use:
|
|
101
|
+
- `Mindee::V2::Client.new(api_key: 'MY_API_KEY')`
|
|
102
|
+
- `Mindee::V2::Client.new` with `MINDEE_V2_API_KEY` env var
|
|
103
|
+
|
|
104
|
+
### Input loading
|
|
105
|
+
Use:
|
|
106
|
+
- `Mindee::Input::Source::PathInputSource`
|
|
107
|
+
- `Mindee::Input::Source::BytesInputSource`
|
|
108
|
+
- `Mindee::Input::Source::Base64InputSource`
|
|
109
|
+
- `Mindee::Input::Source::FileInputSource`
|
|
110
|
+
- `Mindee::Input::Source::URLInputSource`
|
|
111
|
+
|
|
112
|
+
### Sending documents
|
|
113
|
+
Use:
|
|
114
|
+
- `client.enqueue_and_get_result(...)` for polling
|
|
115
|
+
- `client.enqueue(...)` for webhooks
|
|
116
|
+
|
|
117
|
+
### Response handling
|
|
118
|
+
Use:
|
|
119
|
+
- `response.inference`
|
|
120
|
+
- `response.raw_http`
|
|
121
|
+
- `Mindee::Input::LocalResponse` for webhook payloads
|
|
122
|
+
- HMAC signature validation via `local_response.valid_hmac_signature?(secret_key, signature)` when relevant
|
|
123
|
+
|
|
124
|
+
### File preparation
|
|
125
|
+
Use:
|
|
126
|
+
- `input_source.page_count` — get number of pages
|
|
127
|
+
- `input_source.compress!(...)` — compress before upload
|
|
128
|
+
- `input_source.apply_page_options(options)` — trim or remove pages
|
|
129
|
+
|
|
130
|
+
## Avoid
|
|
131
|
+
|
|
132
|
+
- Direct REST examples
|
|
133
|
+
- cURL examples
|
|
134
|
+
- Manual authentication header construction
|
|
135
|
+
- Bearer token examples for API keys
|
|
136
|
+
- Non-Ruby examples
|
|
137
|
+
- V1 examples unless the user explicitly asks for V1
|
|
138
|
+
|
|
139
|
+
## If the user is unclear
|
|
140
|
+
|
|
141
|
+
Ask for only what is needed:
|
|
142
|
+
|
|
143
|
+
- input type: local file or URL
|
|
144
|
+
- delivery pattern: polling or webhook
|
|
145
|
+
- model ID
|
|
146
|
+
- runtime context: Rails, Sinatra, background job, or script
|
|
147
|
+
|
|
148
|
+
## Output style
|
|
149
|
+
|
|
150
|
+
- Be concise.
|
|
151
|
+
- Answer with runnable examples when code is requested.
|
|
152
|
+
- Link to the most relevant doc section.
|
|
153
|
+
- Do not overwhelm the user with every option.
|
|
154
|
+
- Start with the documented default path.
|
|
155
|
+
|
|
156
|
+
---
|
|
157
|
+
|
|
158
|
+
# Agent Instructions: Querying The Documentation
|
|
159
|
+
|
|
160
|
+
If you need additional information that is not directly available in this page, you can query the documentation dynamically by asking a question.
|
|
161
|
+
|
|
162
|
+
Perform an HTTP GET request on the documentation URL with the `ask` query parameter.
|
|
163
|
+
Include `ruby+sdk+-+` at the beginning of the question to get answers specific to this library:
|
|
164
|
+
|
|
165
|
+
```
|
|
166
|
+
GET https://docs.mindee.com/integrations.md?ask=ruby+sdk+-+<question>
|
|
167
|
+
```
|
|
168
|
+
|
|
169
|
+
The question should be specific, self-contained, and written in natural language.
|
|
170
|
+
The response will contain a direct answer to the question and relevant excerpts and sources from the documentation.
|
|
171
|
+
|
|
172
|
+
Use this mechanism when the answer is not explicitly present in the current page, you need clarification or additional context, or you want to retrieve related documentation sections.
|
data/bin/v2/parser.rb
CHANGED
|
@@ -141,7 +141,7 @@ module MindeeCLI
|
|
|
141
141
|
# Initialize common options for search and product commands.
|
|
142
142
|
# @param options_parser [OptionParser]
|
|
143
143
|
def init_common_options(options_parser)
|
|
144
|
-
options_parser.on('-k [KEY]', '--key [KEY]', 'API key
|
|
144
|
+
options_parser.on('-k [KEY]', '--key [KEY]', 'Mindee V2 API key.') { |v| @options[:api_key] = v }
|
|
145
145
|
options_parser.on('-o FORMAT', '--output-format FORMAT', ['raw', 'full', 'summary'],
|
|
146
146
|
'Format of the output (raw, full, summary). Default: summary') do |format|
|
|
147
147
|
@options[:output_format] = format
|
|
@@ -179,6 +179,10 @@ module MindeeCLI
|
|
|
179
179
|
options_parser.on('-a ALIAS', '--alias ALIAS', 'Add a file alias to the response') do |v|
|
|
180
180
|
@options[:alias] = v
|
|
181
181
|
end
|
|
182
|
+
options_parser.on('-w WEBHOOK_ID', '--webhook-id WEBHOOK_ID',
|
|
183
|
+
'Specify a webhook by ID. May be used multiple times.') do |v|
|
|
184
|
+
(@options[:webhook_ids] ||= []) << v
|
|
185
|
+
end
|
|
182
186
|
init_common_options(options_parser)
|
|
183
187
|
options_parser.on('-F', '--fix-pdf', 'Attempt to repair PDF before enqueueing') do
|
|
184
188
|
@options[:repair_pdf] = true
|
|
@@ -190,10 +194,10 @@ module MindeeCLI
|
|
|
190
194
|
end
|
|
191
195
|
|
|
192
196
|
# @return [Hash]
|
|
193
|
-
def setup_product_params
|
|
197
|
+
def setup_product_params(product_command)
|
|
194
198
|
params = { model_id: @options[:model_id] }
|
|
195
199
|
@options.each_pair do |key, value|
|
|
196
|
-
params[key] = value if V2_PRODUCTS[
|
|
200
|
+
params[key] = value if V2_PRODUCTS[product_command].include?(key) || %i[alias webhook_ids].include?(key)
|
|
197
201
|
end
|
|
198
202
|
params
|
|
199
203
|
end
|
|
@@ -205,7 +209,7 @@ module MindeeCLI
|
|
|
205
209
|
mindee_client = Mindee::V2::Client.new(api_key: options[:api_key])
|
|
206
210
|
response_class = V2_PRODUCTS[product_command][:response_class]
|
|
207
211
|
input_source = setup_input_source(options)
|
|
208
|
-
params = setup_product_params
|
|
212
|
+
params = setup_product_params(product_command)
|
|
209
213
|
|
|
210
214
|
mindee_client.enqueue_and_get_result(
|
|
211
215
|
response_class,
|
|
@@ -73,17 +73,10 @@ module Mindee
|
|
|
73
73
|
# Return the file as a Mindee-compatible BufferInput source.
|
|
74
74
|
#
|
|
75
75
|
# @return [FileInputSource] A BufferInput source.
|
|
76
|
-
def
|
|
76
|
+
def as_input_source
|
|
77
77
|
@buffer.rewind
|
|
78
78
|
Mindee::Input::Source::BytesInputSource.new(@buffer.read || '', @filename)
|
|
79
79
|
end
|
|
80
|
-
|
|
81
|
-
# Return the file as a Mindee-compatible BufferInput source.
|
|
82
|
-
#
|
|
83
|
-
# @return [FileInputSource] A BufferInput source.
|
|
84
|
-
def as_input_source
|
|
85
|
-
as_source
|
|
86
|
-
end
|
|
87
80
|
end
|
|
88
81
|
end
|
|
89
82
|
end
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'extracted_image'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
# Image Extraction Module.
|
|
7
|
+
module Image
|
|
8
|
+
# List of extracted images.
|
|
9
|
+
class ExtractedImages < Array
|
|
10
|
+
# Save all extracted images to disk.
|
|
11
|
+
#
|
|
12
|
+
# @param output_path [String, Pathname] Directory path to save the extracted images to.
|
|
13
|
+
def save_all_to_disk(output_path)
|
|
14
|
+
each do |image|
|
|
15
|
+
image.write_to_file(File.join(output_path.to_s, image.filename))
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -7,16 +7,17 @@ require 'stringio'
|
|
|
7
7
|
require 'tempfile'
|
|
8
8
|
require_relative '../input/sources'
|
|
9
9
|
require_relative 'extracted_image'
|
|
10
|
+
require_relative 'extracted_images'
|
|
10
11
|
|
|
11
12
|
module Mindee
|
|
12
13
|
# Image Extraction Module.
|
|
13
14
|
module Image
|
|
14
15
|
# Image Extraction wrapper class.
|
|
15
16
|
module ImageExtractor
|
|
16
|
-
# Attaches an image as a new page in a
|
|
17
|
+
# Attaches an image as a new page in a PDFDocument object.
|
|
17
18
|
#
|
|
18
19
|
# @param [StringIO] input_buffer Input buffer. Only supports JPEG.
|
|
19
|
-
# @return [Origami::PDF] A
|
|
20
|
+
# @return [Origami::PDF] A PDFDocument handle.
|
|
20
21
|
def self.attach_image_as_new_file(input_buffer, format: 'jpg')
|
|
21
22
|
magick_image = MiniMagick::Image.read(input_buffer)
|
|
22
23
|
# NOTE: We force format consolidation to a single format to avoid frames being interpreted as the final output.
|
|
@@ -32,12 +33,12 @@ module Mindee
|
|
|
32
33
|
# @param [Input::Source::LocalInputSource] input_source
|
|
33
34
|
# @param [Integer] page_id ID of the Page to extract from.
|
|
34
35
|
# @param [Array<Array<Geometry::Point>>, Array<Geometry::Quadrilateral>] polygons List of coordinates to extract.
|
|
35
|
-
# @return [
|
|
36
|
+
# @return [Image::ExtractedImages] Extracted Images.
|
|
36
37
|
def self.extract_multiple_images_from_source(input_source, page_id, polygons)
|
|
37
38
|
new_stream = load_input_source_pdf_page_as_stringio(input_source, page_id)
|
|
38
39
|
new_stream.seek(0)
|
|
39
40
|
|
|
40
|
-
extract_images_from_polygons(input_source, page_id, polygons)
|
|
41
|
+
ExtractedImages.new(extract_images_from_polygons(input_source, page_id, polygons))
|
|
41
42
|
end
|
|
42
43
|
|
|
43
44
|
# Extracts images from their positions on a file (as polygons).
|
|
@@ -45,9 +46,9 @@ module Mindee
|
|
|
45
46
|
# @param [Input::Source::LocalInputSource] input_source Local input source.
|
|
46
47
|
# @param [Integer] page_id Page ID.
|
|
47
48
|
# @param [Array<Geometry::Point, Geometry::Polygon, Geometry::Quadrilateral>] polygons
|
|
48
|
-
# @return [
|
|
49
|
+
# @return [Image::ExtractedImages] Extracted Images.
|
|
49
50
|
def self.extract_images_from_polygons(input_source, page_id, polygons)
|
|
50
|
-
extracted_elements =
|
|
51
|
+
extracted_elements = ExtractedImages.new # @type var extracted_elements: Image::ExtractedImages
|
|
51
52
|
|
|
52
53
|
input_source.io_stream.rewind
|
|
53
54
|
pdf_stream = StringIO.new(input_source.io_stream.read.to_s)
|
|
@@ -65,7 +66,7 @@ module Mindee
|
|
|
65
66
|
min_max_x = Geometry.get_min_max_x(points)
|
|
66
67
|
min_max_y = Geometry.get_min_max_y(points)
|
|
67
68
|
file_extension = ImageUtils.determine_file_extension(input_source)
|
|
68
|
-
cropped_image =
|
|
69
|
+
cropped_image = crop_image(page_content, min_max_x, min_max_y)
|
|
69
70
|
if file_extension == 'pdf'
|
|
70
71
|
cropped_image.format('jpg')
|
|
71
72
|
else
|
|
@@ -102,7 +103,7 @@ module Mindee
|
|
|
102
103
|
#
|
|
103
104
|
# @param input_file [LocalInputSource] Local input.
|
|
104
105
|
# @param [Integer] page_id Page ID.
|
|
105
|
-
# @return [StringIO] A valid
|
|
106
|
+
# @return [StringIO] A valid PDFDocument handle.
|
|
106
107
|
def self.load_input_source_pdf_page_as_stringio(input_file, page_id)
|
|
107
108
|
input_file.io_stream.rewind
|
|
108
109
|
if input_file.pdf?
|
|
@@ -111,6 +112,23 @@ module Mindee
|
|
|
111
112
|
input_file.io_stream
|
|
112
113
|
end
|
|
113
114
|
end
|
|
115
|
+
|
|
116
|
+
# Crops a MiniMagick Image from the given bounding box.
|
|
117
|
+
#
|
|
118
|
+
# @param [MiniMagick::Image] image Input Image.
|
|
119
|
+
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
|
120
|
+
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
|
121
|
+
def self.crop_image(image, min_max_x, min_max_y)
|
|
122
|
+
width = image[:width].to_i
|
|
123
|
+
height = image[:height].to_i
|
|
124
|
+
|
|
125
|
+
image.format('jpg')
|
|
126
|
+
new_width = (min_max_x.max - min_max_x.min) * width
|
|
127
|
+
new_height = (min_max_y.max - min_max_y.min) * height
|
|
128
|
+
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
|
129
|
+
|
|
130
|
+
image
|
|
131
|
+
end
|
|
114
132
|
end
|
|
115
133
|
end
|
|
116
134
|
end
|
|
@@ -124,23 +124,6 @@ module Mindee
|
|
|
124
124
|
MiniMagick::Image.read(pdf_stream)
|
|
125
125
|
end
|
|
126
126
|
|
|
127
|
-
# Crops a MiniMagick Image from a the given bounding box.
|
|
128
|
-
#
|
|
129
|
-
# @param [MiniMagick::Image] image Input Image.
|
|
130
|
-
# @param [Mindee::Geometry::MinMax] min_max_x minimum & maximum values for the x coordinates.
|
|
131
|
-
# @param [Mindee::Geometry::MinMax] min_max_y minimum & maximum values for the y coordinates.
|
|
132
|
-
def self.crop_image(image, min_max_x, min_max_y)
|
|
133
|
-
width = image[:width].to_i
|
|
134
|
-
height = image[:height].to_i
|
|
135
|
-
|
|
136
|
-
image.format('jpg')
|
|
137
|
-
new_width = (min_max_x.max - min_max_x.min) * width
|
|
138
|
-
new_height = (min_max_y.max - min_max_y.min) * height
|
|
139
|
-
image.crop("#{new_width}x#{new_height}+#{min_max_x.min * width}+#{min_max_y.min * height}")
|
|
140
|
-
|
|
141
|
-
image
|
|
142
|
-
end
|
|
143
|
-
|
|
144
127
|
# Writes a MiniMagick::Image to a buffer.
|
|
145
128
|
#
|
|
146
129
|
# @param [MiniMagick::Image] image a valid MiniMagick image.
|
data/lib/mindee/image.rb
CHANGED
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'extracted_pdf'
|
|
4
|
+
|
|
5
|
+
module Mindee
|
|
6
|
+
# PDF Extraction Module.
|
|
7
|
+
module PDF
|
|
8
|
+
# List of extracted PDFs.
|
|
9
|
+
class ExtractedPDFs < Array
|
|
10
|
+
# Save all extracted PDFs to disk.
|
|
11
|
+
#
|
|
12
|
+
# @param output_path [String, Pathname] Directory path to save the extracted PDFs to.
|
|
13
|
+
def save_all_to_disk(output_path)
|
|
14
|
+
each do |pdf|
|
|
15
|
+
pdf.write_to_file(File.join(output_path.to_s, pdf.filename))
|
|
16
|
+
end
|
|
17
|
+
end
|
|
18
|
+
end
|
|
19
|
+
end
|
|
20
|
+
end
|
|
@@ -1,9 +1,9 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
module Mindee
|
|
4
|
-
#
|
|
4
|
+
# PDF Extraction Module.
|
|
5
5
|
module PDF
|
|
6
|
-
#
|
|
6
|
+
# PDF extraction class.
|
|
7
7
|
class PDFExtractor
|
|
8
8
|
# @param local_input [Mindee::Input::Source::LocalInputSource]
|
|
9
9
|
def initialize(local_input)
|
|
@@ -23,15 +23,15 @@ module Mindee
|
|
|
23
23
|
end
|
|
24
24
|
end
|
|
25
25
|
|
|
26
|
-
# Retrieves the page count for the
|
|
26
|
+
# Retrieves the page count for the PDF object.
|
|
27
27
|
# @return [Integer]
|
|
28
28
|
def page_count
|
|
29
29
|
Mindee::PDF::PDFProcessor.open_pdf(@source_pdf).pages.size
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
-
# Creates a new
|
|
33
|
-
# @param page_indexes [Array<Integer>] List of page number to use for merging in the original
|
|
34
|
-
# @return [StringIO] The buffer containing the new
|
|
32
|
+
# Creates a new PDF from pages and save it into a buffer.
|
|
33
|
+
# @param page_indexes [Array<Integer>] List of page number to use for merging in the original PDF.
|
|
34
|
+
# @return [StringIO] The buffer containing the new PDF.
|
|
35
35
|
def cut_pages(page_indexes)
|
|
36
36
|
options = PageOptions.new(params: {
|
|
37
37
|
page_indexes: page_indexes,
|
|
@@ -41,10 +41,10 @@ module Mindee
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
# Extract the sub-documents from the main pdf, based on the given list of page indexes.
|
|
44
|
-
# @param page_indexes [Array<Array<Integer>>] List of page number to use for merging in the original
|
|
45
|
-
# @return [
|
|
44
|
+
# @param page_indexes [Array<Array<Integer>>] List of page number to use for merging in the original PDF.
|
|
45
|
+
# @return [Mindee::PDF::ExtractedPDFs] The buffer containing the new PDF.
|
|
46
46
|
def extract_sub_documents(page_indexes)
|
|
47
|
-
extracted_pdfs =
|
|
47
|
+
extracted_pdfs = ExtractedPDFs.new # @type var extracted_pdfs: Mindee::PDF::ExtractedPDFs
|
|
48
48
|
extension = File.extname(@filename)
|
|
49
49
|
basename = File.basename(@filename, extension)
|
|
50
50
|
page_indexes.each do |page_index_list|
|
|
@@ -74,7 +74,7 @@ module Mindee
|
|
|
74
74
|
# Extracts invoices as complete PDFs from the document.
|
|
75
75
|
# @param page_indexes [Array<Array<Integer>, InvoiceSplitterV1InvoicePageGroup>]
|
|
76
76
|
# @param strict [bool]
|
|
77
|
-
# @return [
|
|
77
|
+
# @return [Mindee::PDF::ExtractedPDFs]
|
|
78
78
|
def extract_invoices(page_indexes, strict: false)
|
|
79
79
|
raise Error::MindeePDFError, 'No indexes provided.' if page_indexes.empty?
|
|
80
80
|
|
data/lib/mindee/pdf.rb
CHANGED
|
@@ -21,7 +21,7 @@ module Mindee
|
|
|
21
21
|
#
|
|
22
22
|
# @param input_source [LocalInputSource] Local Input Source to extract sub-receipts from.
|
|
23
23
|
# @param crops [Array<CropItem>] List of crops.
|
|
24
|
-
# @return [
|
|
24
|
+
# @return [Image::ExtractedImages] Individual extracted zones as an array of ExtractedImage.
|
|
25
25
|
# @raise [MindeeError] if the crops array is empty.
|
|
26
26
|
def self.extract_crops(input_source, crops)
|
|
27
27
|
if crops.nil? || crops.empty?
|
|
@@ -35,15 +35,16 @@ module Mindee
|
|
|
35
35
|
polygons[crop.location.page] << crop.location.polygon
|
|
36
36
|
end
|
|
37
37
|
|
|
38
|
-
images =
|
|
38
|
+
images = Mindee::Image::ExtractedImages.new
|
|
39
39
|
polygons.each_with_index do |page_polygons, page_index|
|
|
40
|
-
|
|
41
|
-
|
|
40
|
+
images.concat(
|
|
41
|
+
Mindee::Image::ImageExtractor.extract_multiple_images_from_source(
|
|
42
|
+
input_source, page_index, page_polygons
|
|
43
|
+
)
|
|
42
44
|
)
|
|
43
|
-
images.concat(extracted)
|
|
44
45
|
end
|
|
45
46
|
|
|
46
|
-
|
|
47
|
+
images
|
|
47
48
|
end
|
|
48
49
|
end
|
|
49
50
|
end
|
|
@@ -18,7 +18,7 @@ module Mindee
|
|
|
18
18
|
#
|
|
19
19
|
# @param input_source [LocalInputSource] Input source to split.
|
|
20
20
|
# @param splits [Array<Array<Integer>>] List of sub-lists of pages to keep.
|
|
21
|
-
# @return [
|
|
21
|
+
# @return [PDF::ExtractedPDFs] A list of extracted invoices.
|
|
22
22
|
# @raise [MindeeError] if no indexes are provided.
|
|
23
23
|
def self.extract_splits(input_source, splits)
|
|
24
24
|
raise Mindee::Error::MindeeError, 'No indexes provided.' if splits.nil? || splits.empty?
|
|
@@ -29,7 +29,7 @@ module Mindee
|
|
|
29
29
|
(split[0]..split[1]).to_a
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
-
|
|
32
|
+
pdf_extractor.extract_sub_documents(page_groups)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
35
|
end
|
|
@@ -0,0 +1,34 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Mindee
|
|
4
|
+
module V2
|
|
5
|
+
module Parsing
|
|
6
|
+
# "Webhook payload returned when an inference fails before producing a result.
|
|
7
|
+
class FailedInferenceResponse < Mindee::V2::Parsing::CommonResponse
|
|
8
|
+
# @return [String] UUID of the failed inference.
|
|
9
|
+
attr_reader :inference_id
|
|
10
|
+
# @return [String] UUID of the model used.
|
|
11
|
+
attr_reader :model_id
|
|
12
|
+
# @return [String] Name of the input file.
|
|
13
|
+
attr_reader :file_name
|
|
14
|
+
# @return [String, Nil] Alias sent for the file, if any.
|
|
15
|
+
attr_reader :file_alias
|
|
16
|
+
# @return [Mindee::V2::Parsing::ErrorResponse] Problem details for the failure, if available.
|
|
17
|
+
attr_reader :error
|
|
18
|
+
# @return [Time] Date and time when the inference was started.
|
|
19
|
+
attr_reader :created_at
|
|
20
|
+
|
|
21
|
+
def initialize(server_response)
|
|
22
|
+
super
|
|
23
|
+
|
|
24
|
+
@inference_id = server_response['inference_id']
|
|
25
|
+
@model_id = server_response['model_id']
|
|
26
|
+
@file_name = server_response['file_name']
|
|
27
|
+
@file_alias = server_response['file_alias']
|
|
28
|
+
@error = ErrorResponse.new(server_response['error'])
|
|
29
|
+
@created_at = Time.iso8601(server_response['created_at'])
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
end
|
|
34
|
+
end
|
data/lib/mindee/v2/parsing.rb
CHANGED
|
@@ -33,7 +33,7 @@ module Mindee
|
|
|
33
33
|
#
|
|
34
34
|
# @param input_source [Mindee::Input::Source::LocalInputSource] Local file to extract from
|
|
35
35
|
# @return [ExtractedImage]
|
|
36
|
-
def
|
|
36
|
+
def extract_from_input_source(input_source)
|
|
37
37
|
Image::ImageExtractor.extract_multiple_images_from_source(
|
|
38
38
|
input_source, @location.page, [@location.polygon]
|
|
39
39
|
)[0]
|
|
@@ -25,14 +25,6 @@ module Mindee
|
|
|
25
25
|
def to_s
|
|
26
26
|
@inference.to_s
|
|
27
27
|
end
|
|
28
|
-
|
|
29
|
-
# Apply the crop inference to a file and return a list of extracted images.
|
|
30
|
-
#
|
|
31
|
-
# @param input_source [Mindee::Input::Source::LocalInputSource] Local file to extract from
|
|
32
|
-
# @return [FileOperation::CropFiles] List of extracted PDFs
|
|
33
|
-
def extract_from_file(input_source)
|
|
34
|
-
FileOperation::Crop.extract_crops(input_source, @inference.result.crops)
|
|
35
|
-
end
|
|
36
28
|
end
|
|
37
29
|
end
|
|
38
30
|
end
|
|
@@ -20,6 +20,14 @@ module Mindee
|
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
# Apply the crop inference to a file and return a list of extracted images.
|
|
24
|
+
#
|
|
25
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource] Local file to extract from
|
|
26
|
+
# @return [Image::ExtractedImages] List of extracted images
|
|
27
|
+
def extract_from_input_source(input_source)
|
|
28
|
+
FileOperation::Crop.extract_crops(input_source, @crops)
|
|
29
|
+
end
|
|
30
|
+
|
|
23
31
|
# String representation.
|
|
24
32
|
# @return [String]
|
|
25
33
|
def to_s
|
|
@@ -34,7 +34,7 @@ module Mindee
|
|
|
34
34
|
#
|
|
35
35
|
# @param input_source [Mindee::Input::Source::LocalInputSource] Local file to extract from
|
|
36
36
|
# @return [PDF::ExtractedPDF]
|
|
37
|
-
def
|
|
37
|
+
def extract_from_input_source(input_source)
|
|
38
38
|
FileOperation::Split.extract_single_split(input_source, @page_range)
|
|
39
39
|
end
|
|
40
40
|
end
|
|
@@ -25,14 +25,6 @@ module Mindee
|
|
|
25
25
|
def to_s
|
|
26
26
|
@inference.to_s
|
|
27
27
|
end
|
|
28
|
-
|
|
29
|
-
# Splits the input PDF.
|
|
30
|
-
# @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
|
|
31
|
-
# @return [FileOperation::SplitFiles]
|
|
32
|
-
def extract_from_file(input_source)
|
|
33
|
-
splits = @inference.result.splits.map(&:page_range)
|
|
34
|
-
FileOperation::Split.extract_splits(input_source, splits)
|
|
35
|
-
end
|
|
36
28
|
end
|
|
37
29
|
end
|
|
38
30
|
end
|
|
@@ -20,6 +20,13 @@ module Mindee
|
|
|
20
20
|
end
|
|
21
21
|
end
|
|
22
22
|
|
|
23
|
+
# Splits the input PDF.
|
|
24
|
+
# @param input_source [Mindee::Input::Source::LocalInputSource] Path to the file or a File object.
|
|
25
|
+
# @return [PDF::ExtractedPDFs]
|
|
26
|
+
def extract_from_input_source(input_source)
|
|
27
|
+
FileOperation::Split.extract_splits(input_source, splits.map(&:page_range))
|
|
28
|
+
end
|
|
29
|
+
|
|
23
30
|
# String representation.
|
|
24
31
|
# @return [String]
|
|
25
32
|
def to_s
|
data/lib/mindee/version.rb
CHANGED
|
@@ -4,9 +4,10 @@ module Mindee
|
|
|
4
4
|
module ImageExtractor
|
|
5
5
|
def self.attach_image_as_new_file: (StringIO | File, ?format: String) -> Origami::PDF
|
|
6
6
|
def self.to_blob: () -> String
|
|
7
|
-
def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) ->
|
|
8
|
-
def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) ->
|
|
7
|
+
def self.extract_multiple_images_from_source: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] |Geometry::Polygon | Geometry::Quadrilateral]) -> ExtractedImages
|
|
8
|
+
def self.extract_images_from_polygons: (Input::Source::LocalInputSource, Integer, Array[Array[Geometry::Point] | Geometry::Polygon | Geometry::Quadrilateral]) -> ExtractedImages
|
|
9
9
|
def self.create_extracted_image: (StringIO | File, String, Integer, Integer) -> ExtractedImage
|
|
10
|
+
def self.crop_image: (MiniMagick::Image, Geometry::MinMax, Geometry::MinMax) -> (MiniMagick::Image)
|
|
10
11
|
def self.load_input_source_pdf_page_as_stringio: (Input::Source::LocalInputSource, Integer) -> (StringIO | File)
|
|
11
12
|
end
|
|
12
13
|
end
|
|
@@ -11,7 +11,6 @@ module Mindee
|
|
|
11
11
|
def self.pdf_to_magick_image: (StringIO | File, Integer) -> MiniMagick::Image
|
|
12
12
|
def self.normalize_polygon: (Geometry::Quadrilateral | Geometry::Polygon | Array[Geometry::Point]) -> Geometry::Quadrilateral
|
|
13
13
|
def self.read_page_content: (StringIO | File) -> (MiniMagick::Image)
|
|
14
|
-
def self.crop_image: (MiniMagick::Image, Geometry::MinMax, Geometry::MinMax) -> (MiniMagick::Image)
|
|
15
14
|
def self.write_image_to_buffer: (MiniMagick::Image, StringIO) -> void
|
|
16
15
|
def self.determine_file_extension: (Input::Source::LocalInputSource) -> String?
|
|
17
16
|
end
|
|
@@ -11,9 +11,9 @@ module Mindee
|
|
|
11
11
|
|
|
12
12
|
def cut_pages: (Array[Integer]) -> StringIO
|
|
13
13
|
|
|
14
|
-
def extract_sub_documents: (Array[Array[Integer]]) ->
|
|
14
|
+
def extract_sub_documents: (Array[Array[Integer]]) -> ExtractedPDFs
|
|
15
15
|
|
|
16
|
-
def extract_invoices: (Array[Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) ->
|
|
16
|
+
def extract_invoices: (Array[Mindee::V1::Product::InvoiceSplitter::InvoiceSplitterV1InvoicePageGroup] | Array[Array[Integer]], ?strict: bool) -> ExtractedPDFs
|
|
17
17
|
end
|
|
18
18
|
end
|
|
19
19
|
end
|
|
@@ -3,7 +3,7 @@ module Mindee
|
|
|
3
3
|
module FileOperation
|
|
4
4
|
module Crop
|
|
5
5
|
def self.extract_single_crop: (Input::Source::LocalInputSource, Parsing::Field::FieldLocation) -> Image::ExtractedImage
|
|
6
|
-
def self.extract_crops: (Input::Source::LocalInputSource, Array[Product::Crop::CropItem]) ->
|
|
6
|
+
def self.extract_crops: (Input::Source::LocalInputSource, Array[Product::Crop::CropItem]) -> Image::ExtractedImages
|
|
7
7
|
end
|
|
8
8
|
end
|
|
9
9
|
end
|
|
@@ -4,7 +4,7 @@ module Mindee
|
|
|
4
4
|
module Split
|
|
5
5
|
def self.extract_single_split: (Input::Source::LocalInputSource, Array[Integer]) -> PDF::ExtractedPDF
|
|
6
6
|
|
|
7
|
-
def self.extract_splits: (Input::Source::LocalInputSource, Array[Array[Integer]]) ->
|
|
7
|
+
def self.extract_splits: (Input::Source::LocalInputSource, Array[Array[Integer]]) -> PDF::ExtractedPDFs
|
|
8
8
|
end
|
|
9
9
|
end
|
|
10
10
|
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
module Mindee
|
|
2
|
+
module V2
|
|
3
|
+
module Parsing
|
|
4
|
+
class FailedInferenceResponse
|
|
5
|
+
def initialize: (Hash[String | Symbol, untyped]) -> void
|
|
6
|
+
attr_reader created_at: Time
|
|
7
|
+
attr_reader error: ::Mindee::V2::Parsing::ErrorResponse
|
|
8
|
+
attr_reader file_alias: String
|
|
9
|
+
attr_reader file_name: String
|
|
10
|
+
attr_reader inference_id: String
|
|
11
|
+
attr_reader model_id: String
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -9,7 +9,7 @@ module Mindee
|
|
|
9
9
|
|
|
10
10
|
def initialize: (Hash[String | Symbol, untyped]) -> void
|
|
11
11
|
|
|
12
|
-
def
|
|
12
|
+
def extract_from_input_source: (Input::Source::LocalInputSource) -> Image::ExtractedImage
|
|
13
13
|
|
|
14
14
|
def to_s: -> String
|
|
15
15
|
end
|
|
@@ -13,8 +13,6 @@ module Mindee
|
|
|
13
13
|
|
|
14
14
|
def _params_type: -> singleton(Params::CropParameters)
|
|
15
15
|
|
|
16
|
-
def extract_from_file: (Input::Source::LocalInputSource) -> FileOperation::CropFiles
|
|
17
|
-
|
|
18
16
|
def to_s: -> String
|
|
19
17
|
def self._params_type: () -> singleton(Params::CropParameters)
|
|
20
18
|
def self.slug: () -> String
|
|
@@ -9,7 +9,7 @@ module Mindee
|
|
|
9
9
|
|
|
10
10
|
def initialize: (Hash[String | Symbol, untyped]) -> void
|
|
11
11
|
|
|
12
|
-
def
|
|
12
|
+
def extract_from_input_source: (Input::Source::LocalInputSource) -> PDF::ExtractedPDF
|
|
13
13
|
|
|
14
14
|
def to_s: -> String
|
|
15
15
|
end
|
|
@@ -13,8 +13,6 @@ module Mindee
|
|
|
13
13
|
|
|
14
14
|
def _params_type: -> singleton(Params::SplitParameters)
|
|
15
15
|
|
|
16
|
-
def extract_from_file: (Mindee::Input::Source::LocalInputSource) -> FileOperation::SplitFiles
|
|
17
|
-
|
|
18
16
|
def to_s: -> String
|
|
19
17
|
def self._params_type: () -> singleton(Params::SplitParameters)
|
|
20
18
|
def self.slug: () -> String
|
|
@@ -5,7 +5,10 @@ module Mindee
|
|
|
5
5
|
class SplitResult
|
|
6
6
|
attr_reader splits: Array[SplitRange]
|
|
7
7
|
|
|
8
|
+
def extract_from_input_source: (Mindee::Input::Source::LocalInputSource) -> PDF::ExtractedPDFs
|
|
9
|
+
|
|
8
10
|
def initialize: (Hash[String | Symbol, untyped]) -> void
|
|
11
|
+
|
|
9
12
|
def to_s: -> String
|
|
10
13
|
end
|
|
11
14
|
end
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: mindee
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 5.
|
|
4
|
+
version: 5.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Mindee, SA
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-
|
|
11
|
+
date: 2026-07-03 00:00:00.000000000 Z
|
|
12
12
|
dependencies:
|
|
13
13
|
- !ruby/object:Gem::Dependency
|
|
14
14
|
name: base64
|
|
@@ -193,6 +193,7 @@ files:
|
|
|
193
193
|
- LICENSE
|
|
194
194
|
- README.md
|
|
195
195
|
- Rakefile
|
|
196
|
+
- SKILL.md
|
|
196
197
|
- Steepfile
|
|
197
198
|
- bin/console
|
|
198
199
|
- bin/mindee.rb
|
|
@@ -250,6 +251,7 @@ files:
|
|
|
250
251
|
- lib/mindee/http/response_validation.rb
|
|
251
252
|
- lib/mindee/image.rb
|
|
252
253
|
- lib/mindee/image/extracted_image.rb
|
|
254
|
+
- lib/mindee/image/extracted_images.rb
|
|
253
255
|
- lib/mindee/image/image_compressor.rb
|
|
254
256
|
- lib/mindee/image/image_extractor.rb
|
|
255
257
|
- lib/mindee/image/image_utils.rb
|
|
@@ -269,6 +271,7 @@ files:
|
|
|
269
271
|
- lib/mindee/page_options.rb
|
|
270
272
|
- lib/mindee/pdf.rb
|
|
271
273
|
- lib/mindee/pdf/extracted_pdf.rb
|
|
274
|
+
- lib/mindee/pdf/extracted_pdfs.rb
|
|
272
275
|
- lib/mindee/pdf/pdf_compressor.rb
|
|
273
276
|
- lib/mindee/pdf/pdf_extractor.rb
|
|
274
277
|
- lib/mindee/pdf/pdf_processor.rb
|
|
@@ -398,9 +401,7 @@ files:
|
|
|
398
401
|
- lib/mindee/v2/client.rb
|
|
399
402
|
- lib/mindee/v2/file_operations.rb
|
|
400
403
|
- lib/mindee/v2/file_operations/crop.rb
|
|
401
|
-
- lib/mindee/v2/file_operations/crop_files.rb
|
|
402
404
|
- lib/mindee/v2/file_operations/split.rb
|
|
403
|
-
- lib/mindee/v2/file_operations/split_files.rb
|
|
404
405
|
- lib/mindee/v2/http.rb
|
|
405
406
|
- lib/mindee/v2/http/.rubocop.yml
|
|
406
407
|
- lib/mindee/v2/http/api_v2_settings.rb
|
|
@@ -411,6 +412,7 @@ files:
|
|
|
411
412
|
- lib/mindee/v2/parsing/common_response.rb
|
|
412
413
|
- lib/mindee/v2/parsing/error_item.rb
|
|
413
414
|
- lib/mindee/v2/parsing/error_response.rb
|
|
415
|
+
- lib/mindee/v2/parsing/failed_inference_response.rb
|
|
414
416
|
- lib/mindee/v2/parsing/field.rb
|
|
415
417
|
- lib/mindee/v2/parsing/field/base_field.rb
|
|
416
418
|
- lib/mindee/v2/parsing/field/field_confidence.rb
|
|
@@ -492,6 +494,7 @@ files:
|
|
|
492
494
|
- sig/mindee/http/http_error_handler.rbs
|
|
493
495
|
- sig/mindee/http/response_validation.rbs
|
|
494
496
|
- sig/mindee/image/extracted_image.rbs
|
|
497
|
+
- sig/mindee/image/extracted_images.rbs
|
|
495
498
|
- sig/mindee/image/image_compressor.rbs
|
|
496
499
|
- sig/mindee/image/image_extractor.rbs
|
|
497
500
|
- sig/mindee/image/image_utils.rbs
|
|
@@ -507,6 +510,7 @@ files:
|
|
|
507
510
|
- sig/mindee/logging/logger.rbs
|
|
508
511
|
- sig/mindee/page_options.rbs
|
|
509
512
|
- sig/mindee/pdf/extracted_pdf.rbs
|
|
513
|
+
- sig/mindee/pdf/extracted_pdfs.rbs
|
|
510
514
|
- sig/mindee/pdf/pdf_compressor.rbs
|
|
511
515
|
- sig/mindee/pdf/pdf_extractor.rbs
|
|
512
516
|
- sig/mindee/pdf/pdf_processor.rbs
|
|
@@ -622,9 +626,7 @@ files:
|
|
|
622
626
|
- sig/mindee/v1/product/universal/universal_prediction.rbs
|
|
623
627
|
- sig/mindee/v2/client.rbs
|
|
624
628
|
- sig/mindee/v2/file_operation/crop.rbs
|
|
625
|
-
- sig/mindee/v2/file_operation/crop_files.rbs
|
|
626
629
|
- sig/mindee/v2/file_operation/split.rbs
|
|
627
|
-
- sig/mindee/v2/file_operation/split_files.rbs
|
|
628
630
|
- sig/mindee/v2/http/api_v2_settings.rbs
|
|
629
631
|
- sig/mindee/v2/http/mindee_api_v2.rbs
|
|
630
632
|
- sig/mindee/v2/parsing/base_inference.rbs
|
|
@@ -632,6 +634,7 @@ files:
|
|
|
632
634
|
- sig/mindee/v2/parsing/common_response.rbs
|
|
633
635
|
- sig/mindee/v2/parsing/error_item.rbs
|
|
634
636
|
- sig/mindee/v2/parsing/error_response.rbs
|
|
637
|
+
- sig/mindee/v2/parsing/failed_inference_response.rbs
|
|
635
638
|
- sig/mindee/v2/parsing/field/base_field.rbs
|
|
636
639
|
- sig/mindee/v2/parsing/field/field_confidence.rbs
|
|
637
640
|
- sig/mindee/v2/parsing/field/field_location.rbs
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Mindee
|
|
4
|
-
module V2
|
|
5
|
-
module FileOperation
|
|
6
|
-
# Collection of cropped files.
|
|
7
|
-
class CropFiles < Array
|
|
8
|
-
# Save all extracted crops to disk.
|
|
9
|
-
#
|
|
10
|
-
# @param path [String, Pathname] Path to save the extracted crops to.
|
|
11
|
-
# @param prefix [String] Prefix to add to the filename, defaults to 'crop'.
|
|
12
|
-
# @param file_format [String, nil] File format to save the crops as, defaults to jpg if nil.]
|
|
13
|
-
def save_all_to_disk(path, prefix: 'crop', file_format: nil)
|
|
14
|
-
FileUtils.mkdir_p(path)
|
|
15
|
-
each.with_index(1) do |crop, idx|
|
|
16
|
-
filename = "#{prefix}_#{format('%03d', idx)}.jpg"
|
|
17
|
-
file_path = File.join(path.to_s, filename)
|
|
18
|
-
|
|
19
|
-
crop.write_to_file(file_path, file_format)
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|
|
@@ -1,25 +0,0 @@
|
|
|
1
|
-
# frozen_string_literal: true
|
|
2
|
-
|
|
3
|
-
module Mindee
|
|
4
|
-
module V2
|
|
5
|
-
module FileOperation
|
|
6
|
-
# Collection of split files.
|
|
7
|
-
class SplitFiles < Array
|
|
8
|
-
# Save all extracted splits to disk.
|
|
9
|
-
#
|
|
10
|
-
# @param path [String, Pathname] Path to save the extracted splits to.
|
|
11
|
-
# @param prefix [String] Prefix to add to the filename, defaults to 'split'.
|
|
12
|
-
def save_all_to_disk(path, prefix: 'split')
|
|
13
|
-
FileUtils.mkdir_p(path)
|
|
14
|
-
|
|
15
|
-
each.with_index(1) do |split, idx|
|
|
16
|
-
filename = "#{prefix}_#{format('%03d', idx)}.pdf"
|
|
17
|
-
file_path = File.join(path.to_s, filename)
|
|
18
|
-
|
|
19
|
-
split.write_to_file(file_path)
|
|
20
|
-
end
|
|
21
|
-
end
|
|
22
|
-
end
|
|
23
|
-
end
|
|
24
|
-
end
|
|
25
|
-
end
|