mindee 3.17.0 → 3.18.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +8 -0
- data/README.md +4 -4
- data/bin/mindee.rb +0 -6
- data/docs/custom_v1.md +1 -1
- data/docs/getting_started.md +5 -5
- data/lib/mindee/extraction/tax_extractor/tax_extractor.rb +34 -19
- data/lib/mindee/input/sources/base64_input_source.rb +31 -0
- data/lib/mindee/input/sources/bytes_input_source.rb +21 -0
- data/lib/mindee/input/sources/file_input_source.rb +20 -0
- data/lib/mindee/input/sources/local_input_source.rb +183 -0
- data/lib/mindee/input/sources/path_input_source.rb +20 -0
- data/lib/mindee/input/sources/url_input_source.rb +127 -0
- data/lib/mindee/input/sources.rb +6 -248
- data/lib/mindee/product.rb +0 -1
- data/lib/mindee/version.rb +1 -1
- metadata +8 -6
- data/docs/code_samples/carte_vitale_v1.txt +0 -19
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb +0 -41
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb +0 -52
- data/lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb +0 -34
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6d2f2fd3fa8b6ca9d7756f08d3f7b526b510866d0c75a33ccfab1ff20b3ba2c6
|
4
|
+
data.tar.gz: 7f09aa2efbe008271aa229515ce7f804680ea46d7318c6fe62d57889785762bb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 8bfa14969fc8938ca888b4c44a4861c969f816e5611bdf0150cec25677c3a6bb4cbd34d8e8b5a8ff48d2483fe8ed1ae462358a67bcd40176b3184122c65b5e76
|
7
|
+
data.tar.gz: 5afeb6406e28ea57c2a56d7f5e52f90739cb7be1b77648773d3365d52adf72f8ea6b97be16838aa428a32ce5cbc414acabe9b0d61196fc05a2027231062598f9
|
data/CHANGELOG.md
CHANGED
@@ -1,5 +1,13 @@
|
|
1
1
|
# Mindee Ruby API Library Changelog
|
2
2
|
|
3
|
+
## v3.18.0 - 2024-12-13
|
4
|
+
### Changes
|
5
|
+
* :sparkles: allow local downloading of remote sources
|
6
|
+
* :coffin: remove support for (FR) Carte Vitale V1 in favor of French Health Card V1
|
7
|
+
### Fixes
|
8
|
+
* :bug: fix tax-extraction script
|
9
|
+
|
10
|
+
|
3
11
|
## v3.17.0 - 2024-11-28
|
4
12
|
### Changes
|
5
13
|
* :sparkles: add support for workflows
|
data/README.md
CHANGED
@@ -116,7 +116,7 @@ result = mindee_client.parse(
|
|
116
116
|
puts result.document
|
117
117
|
```
|
118
118
|
|
119
|
-
### Custom
|
119
|
+
### Custom Documents (docTI & Custom APIs)
|
120
120
|
|
121
121
|
```ruby
|
122
122
|
require 'mindee'
|
@@ -131,9 +131,9 @@ endpoint = mindee_client.create_endpoint(
|
|
131
131
|
# Load a file from disk
|
132
132
|
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
133
133
|
|
134
|
-
result = mindee_client.
|
134
|
+
result = mindee_client.enqueue_and_parse(
|
135
135
|
input_source,
|
136
|
-
Mindee::Product::
|
136
|
+
Mindee::Product::Generated::GeneratedV1,
|
137
137
|
endpoint: endpoint
|
138
138
|
)
|
139
139
|
|
@@ -181,7 +181,7 @@ customize the experience.
|
|
181
181
|
* [EU Driver License OCR Ruby](https://developers.mindee.com/docs/ruby-eu-driver-license-ocr)
|
182
182
|
* [FR Bank Account Details OCR Ruby](https://developers.mindee.com/docs/ruby-fr-bank-account-details-ocr)
|
183
183
|
* [FR Bank Statement OCR Ruby](https://developers.mindee.com/docs/ruby-fr-bank-statement-ocr)
|
184
|
-
* [FR
|
184
|
+
* [FR Health Card OCR Ruby](https://developers.mindee.com/docs/ruby-fr-health-card-ocr)
|
185
185
|
* [FR ID Card OCR Ruby](https://developers.mindee.com/docs/ruby-fr-carte-nationale-didentite-ocr)
|
186
186
|
* [US Bank Check OCR Ruby](https://developers.mindee.com/docs/ruby-us-bank-check-ocr)
|
187
187
|
* [US Driver License OCR Ruby](https://developers.mindee.com/docs/ruby-us-driver-license-ocr)
|
data/bin/mindee.rb
CHANGED
@@ -97,12 +97,6 @@ DOCUMENTS = {
|
|
97
97
|
sync: false,
|
98
98
|
async: true,
|
99
99
|
},
|
100
|
-
"fr-carte-vitale" => {
|
101
|
-
description: "FR Carte Vitale",
|
102
|
-
doc_class: Mindee::Product::FR::CarteVitale::CarteVitaleV1,
|
103
|
-
sync: true,
|
104
|
-
async: false,
|
105
|
-
},
|
106
100
|
"fr-id-card" => {
|
107
101
|
description: "FR ID Card",
|
108
102
|
doc_class: Mindee::Product::FR::IdCard::IdCardV2,
|
data/docs/custom_v1.md
CHANGED
@@ -4,7 +4,7 @@ category: 622b805aaec68102ea7fcbc2
|
|
4
4
|
slug: ruby-api-builder-ocr
|
5
5
|
parentDoc: 6294d97ee723f1008d2ab28e
|
6
6
|
---
|
7
|
-
> 🚧 This product is still supported, but is considered to be deprecated. If you are looking for the
|
7
|
+
> 🚧 This product is still supported, but is considered to be deprecated. If you are looking for the docTI API documentation, you can find it [here](https://developers.mindee.com/docs/ruby-generated-ocr).
|
8
8
|
|
9
9
|
# Quick-Start
|
10
10
|
|
data/docs/getting_started.md
CHANGED
@@ -226,20 +226,20 @@ result = mindee_client.parse(
|
|
226
226
|
)
|
227
227
|
```
|
228
228
|
|
229
|
-
### Custom Documents
|
229
|
+
### Custom Documents (docTI)
|
230
230
|
For custom documents, the endpoint to use must also be set, and it must take in an `endpoint_name`:
|
231
231
|
|
232
232
|
```ruby
|
233
|
-
endpoint = mindee_client.create_endpoint(endpoint_name: 'wnine')
|
233
|
+
endpoint = mindee_client.create_endpoint(endpoint_name: 'wnine', account_name: 'my-account')
|
234
234
|
|
235
|
-
result = mindee_client.
|
235
|
+
result = mindee_client.enqueue_and_parse(
|
236
236
|
input_source,
|
237
|
-
Mindee::Product::
|
237
|
+
Mindee::Product::Generated::GeneratedV1,
|
238
238
|
endpoint: endpoint
|
239
239
|
)
|
240
240
|
```
|
241
241
|
|
242
|
-
This is because the `
|
242
|
+
This is because the `GeneratedV1` class is enough to handle the return processing, but the actual endpoint needs to be specified.
|
243
243
|
|
244
244
|
## Process the Result
|
245
245
|
The response object is common to all documents, including custom documents. The main properties are:
|
@@ -2,6 +2,8 @@
|
|
2
2
|
|
3
3
|
require_relative 'ocr_extractor'
|
4
4
|
|
5
|
+
# rubocop:disable Metrics/ClassLength
|
6
|
+
|
5
7
|
module Mindee
|
6
8
|
module Extraction
|
7
9
|
# Tax extractor class
|
@@ -72,9 +74,12 @@ module Mindee
|
|
72
74
|
reconstructed_hash['code'] =
|
73
75
|
found_hash['code'].nil? ? found_hash['code'] : found_hash['code'].sub(%r{\s*\.*\s*$}, '')
|
74
76
|
|
75
|
-
if found_hash['rate']
|
76
|
-
found_hash['rate']
|
77
|
-
found_hash['rate']
|
77
|
+
if found_hash['rate']
|
78
|
+
if found_hash['rate'].abs < 1
|
79
|
+
found_hash['rate'] *= 10
|
80
|
+
elsif found_hash['rate'].abs > 100
|
81
|
+
found_hash['rate'] /= 10
|
82
|
+
end
|
78
83
|
end
|
79
84
|
found_hash = swap_rates_if_needed(found_hash, min_rate_percentage, max_rate_percentage)
|
80
85
|
found_hash = decimate_rates_if_needed(found_hash)
|
@@ -125,18 +130,28 @@ module Mindee
|
|
125
130
|
# @param found_hash [Hash] Hash of currently retrieved values
|
126
131
|
# @return [Hash]
|
127
132
|
def self.set_base_and_value(reconstructed_hash, found_hash)
|
128
|
-
|
129
|
-
|
130
|
-
|
131
|
-
|
132
|
-
reconstructed_hash['base']
|
133
|
-
|
134
|
-
|
135
|
-
|
133
|
+
base = found_hash['base']
|
134
|
+
value = found_hash['value']
|
135
|
+
|
136
|
+
if base && value
|
137
|
+
reconstructed_hash['base'], reconstructed_hash['value'] = [base, value].minmax
|
138
|
+
elsif base
|
139
|
+
reconstructed_hash['base'] = base
|
140
|
+
elsif value
|
141
|
+
reconstructed_hash['value'] = value
|
142
|
+
calculate_base(reconstructed_hash)
|
136
143
|
end
|
144
|
+
|
137
145
|
reconstructed_hash
|
138
146
|
end
|
139
147
|
|
148
|
+
def self.calculate_base(hash)
|
149
|
+
rate = hash['rate']
|
150
|
+
return unless rate&.positive?
|
151
|
+
|
152
|
+
hash['base'] = hash['value'] / (rate / 100.0)
|
153
|
+
end
|
154
|
+
|
140
155
|
# Extracts a single custom type of tax.
|
141
156
|
# For the sake of simplicity, this only extracts the first example, unless specifically instructed otherwise.
|
142
157
|
# @param ocr_result [Mindee::Parsing::Common::Ocr::Ocr] result of the OCR.
|
@@ -149,7 +164,6 @@ module Mindee
|
|
149
164
|
|
150
165
|
tax_names.sort!
|
151
166
|
found_hash = pick_best(extract_horizontal_tax(ocr_result, tax_names), tax_names)
|
152
|
-
# a tax is considered found horizontally if it has a value, otherwise it is vertical
|
153
167
|
if found_hash.nil? || found_hash['value'].nil?
|
154
168
|
found_hash = extract_vertical_tax(ocr_result, tax_names,
|
155
169
|
found_hash)
|
@@ -240,14 +254,14 @@ module Mindee
|
|
240
254
|
linear_pattern_percent_first = %r{
|
241
255
|
((?:\s*-\s*)?(?:\d*[.,])*\d+[ ]?%?|%?[ ]?(?:\s*-\s*)?(?:\d*[.,])*\d+)?[ .]?
|
242
256
|
([a-zA-ZÀ-ÖØ-öø-ÿ .]*[a-zA-ZÀ-ÖØ-öø-ÿ]?)[ .]?
|
243
|
-
((?:\s*-\s*)?(?:\d*[.,])+\d
|
244
|
-
((?:\s*-\s*)?(\d*[.,])*\d
|
257
|
+
((?:\s*-\s*)?(?:\d*[.,])+\d+)?[ .]*
|
258
|
+
((?:\s*-\s*)?(\d*[.,])*\d+)?
|
245
259
|
}x
|
246
260
|
linear_pattern_percent_second = %r{
|
247
261
|
([a-zA-ZÀ-ÖØ-öø-ÿ .]*[a-zA-ZÀ-ÖØ-öø-ÿ]?)[ .]*
|
248
262
|
((?:\s*-\s*)?(?:\d*[.,])*\d+[ ]?%?|%?[ ]?(?:\s*-\s*)?(?:\d*[.,])*\d+)?[ .]?
|
249
|
-
((?:\s*-\s*)?(?:\d*[.,])+\d
|
250
|
-
((?:\s*-\s*)?(\d*[.,])*\d
|
263
|
+
((?:\s*-\s*)?(?:\d*[.,])+\d+)?[ .]*
|
264
|
+
((?:\s*-\s*)?(\d*[.,])*\d+)?
|
251
265
|
}x
|
252
266
|
ocr_result.mvision_v1.pages.each.with_index do |page, page_id|
|
253
267
|
page.all_lines.each do |line|
|
@@ -304,7 +318,7 @@ module Mindee
|
|
304
318
|
page.all_words.each do |word|
|
305
319
|
next if match_index(word.text, tax_names).nil?
|
306
320
|
|
307
|
-
reconstructed_line = ocr_result.reconstruct_vertically(word.polygon, page_id)
|
321
|
+
reconstructed_line = ocr_result.reconstruct_vertically(word.polygon, page_id, 0.25)
|
308
322
|
found_hash['page_id'] = page_id if found_hash['page_id'].nil?
|
309
323
|
found_hash['code'] = word.text.strip if found_hash['code'].nil?
|
310
324
|
found_hash = extract_vertical_tax_values(reconstructed_line, found_hash)
|
@@ -316,8 +330,9 @@ module Mindee
|
|
316
330
|
private_class_method :extract_percentage_from_tax, :extract_basis_and_value, :extract_tax_from_horizontal_line,
|
317
331
|
:extract_horizontal_tax, :extract_vertical_tax_values, :extract_vertical_tax,
|
318
332
|
:create_tax_field, :fix_rate, :pick_best, :calculate_score, :curate_values,
|
319
|
-
:decimate_rates_if_needed, :
|
320
|
-
:swap_rates_if_needed
|
333
|
+
:decimate_rates_if_needed, :set_base_and_value, :valid_candidate?,
|
334
|
+
:swap_rates_if_needed, :calculate_base
|
321
335
|
end
|
322
336
|
end
|
323
337
|
end
|
338
|
+
# rubocop:enable Metrics/ClassLength
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Input
|
7
|
+
module Source
|
8
|
+
# Load a document from a base64 string.
|
9
|
+
class Base64InputSource < LocalInputSource
|
10
|
+
# @param base64_string [String]
|
11
|
+
# @param filename [String]
|
12
|
+
# @param fix_pdf [Boolean]
|
13
|
+
def initialize(base64_string, filename, fix_pdf: false)
|
14
|
+
io_stream = StringIO.new(base64_string.unpack1('m*'))
|
15
|
+
io_stream.set_encoding Encoding::BINARY
|
16
|
+
super(io_stream, filename, fix_pdf: fix_pdf)
|
17
|
+
end
|
18
|
+
|
19
|
+
# Overload of the same function to prevent a base64 from being re-encoded.
|
20
|
+
# @param close [Boolean]
|
21
|
+
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
22
|
+
def read_document(close: true)
|
23
|
+
@io_stream.seek(0)
|
24
|
+
data = @io_stream.read
|
25
|
+
@io_stream.close if close
|
26
|
+
['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Input
|
7
|
+
module Source
|
8
|
+
# Load a document from raw bytes.
|
9
|
+
class BytesInputSource < LocalInputSource
|
10
|
+
# @param raw_bytes [String]
|
11
|
+
# @param filename [String]
|
12
|
+
# @param fix_pdf [Boolean]
|
13
|
+
def initialize(raw_bytes, filename, fix_pdf: false)
|
14
|
+
io_stream = StringIO.new(raw_bytes)
|
15
|
+
io_stream.set_encoding Encoding::BINARY
|
16
|
+
super(io_stream, filename, fix_pdf: fix_pdf)
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Input
|
7
|
+
module Source
|
8
|
+
# Load a document from a file handle.
|
9
|
+
class FileInputSource < LocalInputSource
|
10
|
+
# @param input_file [File]
|
11
|
+
# @param filename [String]
|
12
|
+
# @param fix_pdf [Boolean]
|
13
|
+
def initialize(input_file, filename, fix_pdf: false)
|
14
|
+
io_stream = input_file
|
15
|
+
super(io_stream, filename, fix_pdf: fix_pdf)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,183 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
require 'marcel'
|
5
|
+
|
6
|
+
require_relative '../../pdf'
|
7
|
+
require_relative '../../image'
|
8
|
+
|
9
|
+
module Mindee
|
10
|
+
module Input
|
11
|
+
# Document source handling.
|
12
|
+
module Source
|
13
|
+
# Mime types accepted by the server.
|
14
|
+
ALLOWED_MIME_TYPES = [
|
15
|
+
'application/pdf',
|
16
|
+
'image/heic',
|
17
|
+
'image/png',
|
18
|
+
'image/jpeg',
|
19
|
+
'image/tiff',
|
20
|
+
'image/webp',
|
21
|
+
].freeze
|
22
|
+
|
23
|
+
# Standard error for invalid mime types
|
24
|
+
class MimeTypeError < StandardError
|
25
|
+
end
|
26
|
+
|
27
|
+
# Error sent if the file's mimetype isn't allowed
|
28
|
+
class InvalidMimeTypeError < MimeTypeError
|
29
|
+
# @return [String]
|
30
|
+
attr_reader :invalid_mimetype
|
31
|
+
|
32
|
+
# @param mime_type [String]
|
33
|
+
def initialize(mime_type)
|
34
|
+
@invalid_mimetype = mime_type
|
35
|
+
super("'#{@invalid_mimetype}' mime type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}")
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
# Error sent if a pdf file couldn't be fixed
|
40
|
+
class UnfixablePDFError < MimeTypeError
|
41
|
+
def initialize
|
42
|
+
super("Corrupted PDF couldn't be repaired.")
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
# Base class for loading documents.
|
47
|
+
class LocalInputSource
|
48
|
+
# @return [String]
|
49
|
+
attr_reader :filename
|
50
|
+
# @return [String]
|
51
|
+
attr_reader :file_mimetype
|
52
|
+
# @return [StringIO]
|
53
|
+
attr_reader :io_stream
|
54
|
+
|
55
|
+
# @param io_stream [StringIO]
|
56
|
+
# @param filename [String]
|
57
|
+
# @param fix_pdf [Boolean]
|
58
|
+
def initialize(io_stream, filename, fix_pdf: false)
|
59
|
+
@io_stream = io_stream
|
60
|
+
@filename = filename
|
61
|
+
@file_mimetype = if fix_pdf
|
62
|
+
Marcel::MimeType.for @io_stream
|
63
|
+
else
|
64
|
+
Marcel::MimeType.for @io_stream, name: @filename
|
65
|
+
end
|
66
|
+
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
67
|
+
|
68
|
+
if filename.end_with?('.pdf') && fix_pdf
|
69
|
+
rescue_broken_pdf(@io_stream)
|
70
|
+
@file_mimetype = Marcel::MimeType.for @io_stream
|
71
|
+
|
72
|
+
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
73
|
+
end
|
74
|
+
|
75
|
+
raise InvalidMimeTypeError, @file_mimetype.to_s
|
76
|
+
end
|
77
|
+
|
78
|
+
# Attempts to fix pdf files if mimetype is rejected.
|
79
|
+
# "Broken PDFs" are often a result of third-party injecting invalid headers.
|
80
|
+
# This attempts to remove them and send the file
|
81
|
+
# @param stream [StringIO]
|
82
|
+
def rescue_broken_pdf(stream)
|
83
|
+
stream.gets('%PDF-')
|
84
|
+
raise UnfixablePDFError if stream.eof? || stream.pos > 500
|
85
|
+
|
86
|
+
stream.pos = stream.pos - 5
|
87
|
+
data = stream.read
|
88
|
+
@io_stream.close
|
89
|
+
|
90
|
+
@io_stream = StringIO.new
|
91
|
+
@io_stream << data
|
92
|
+
end
|
93
|
+
|
94
|
+
# Shorthand for pdf mimetype validation.
|
95
|
+
def pdf?
|
96
|
+
@file_mimetype.to_s == 'application/pdf'
|
97
|
+
end
|
98
|
+
|
99
|
+
# Parses a PDF file according to provided options.
|
100
|
+
# @param options [Hash, nil] Page cutting/merge options:
|
101
|
+
#
|
102
|
+
# * `:page_indexes` Zero-based list of page indexes.
|
103
|
+
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
104
|
+
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
105
|
+
# * `:REMOVE` - remove the specified pages, and keep all others.
|
106
|
+
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
107
|
+
def process_pdf(options)
|
108
|
+
@io_stream.seek(0)
|
109
|
+
@io_stream = PdfProcessor.parse(@io_stream, options)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Reads a document.
|
113
|
+
# @param close [Boolean]
|
114
|
+
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
115
|
+
def read_document(close: true)
|
116
|
+
@io_stream.seek(0)
|
117
|
+
# Avoids needlessly re-packing some files
|
118
|
+
data = @io_stream.read
|
119
|
+
@io_stream.close if close
|
120
|
+
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
121
|
+
end
|
122
|
+
|
123
|
+
def count_pdf_pages
|
124
|
+
return 1 unless pdf?
|
125
|
+
|
126
|
+
@io_stream.seek(0)
|
127
|
+
pdf_processor = Mindee::PDF::PdfProcessor.open_pdf(@io_stream)
|
128
|
+
pdf_processor.pages.size
|
129
|
+
end
|
130
|
+
|
131
|
+
# Compresses the file, according to the provided info.
|
132
|
+
# @param [Integer] quality Quality of the output file.
|
133
|
+
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
|
134
|
+
# @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
|
135
|
+
# @param [Boolean] force_source_text Whether to force the operation on PDFs with source text.
|
136
|
+
# This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
|
137
|
+
# WARNING: this operation is strongly discouraged.
|
138
|
+
# @param [Boolean] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
139
|
+
# not. Needs force_source_text to work.
|
140
|
+
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
141
|
+
buffer = if pdf?
|
142
|
+
Mindee::PDF::PDFCompressor.compress_pdf(
|
143
|
+
@io_stream,
|
144
|
+
quality: quality,
|
145
|
+
force_source_text_compression: force_source_text,
|
146
|
+
disable_source_text: disable_source_text
|
147
|
+
)
|
148
|
+
else
|
149
|
+
Mindee::Image::ImageCompressor.compress_image(
|
150
|
+
@io_stream,
|
151
|
+
quality: quality,
|
152
|
+
max_width: max_width,
|
153
|
+
max_height: max_height
|
154
|
+
)
|
155
|
+
end
|
156
|
+
@io_stream = buffer
|
157
|
+
@io_stream.rewind
|
158
|
+
end
|
159
|
+
|
160
|
+
# Checks whether the file has source text if it is a pdf. False otherwise
|
161
|
+
# @return [Boolean] True if the file is a PDF and has source text.
|
162
|
+
def source_text?
|
163
|
+
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
164
|
+
end
|
165
|
+
end
|
166
|
+
|
167
|
+
# Replaces non-ASCII characters by their UNICODE escape sequence.
|
168
|
+
# Keeps other characters as is.
|
169
|
+
# @return A clean String.
|
170
|
+
def self.convert_to_unicode_escape(string)
|
171
|
+
unicode_escape_string = ''.dup
|
172
|
+
string.each_char do |char|
|
173
|
+
unicode_escape_string << if char.bytesize > 1
|
174
|
+
"\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
|
175
|
+
else
|
176
|
+
char
|
177
|
+
end
|
178
|
+
end
|
179
|
+
unicode_escape_string
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'stringio'
|
4
|
+
|
5
|
+
module Mindee
|
6
|
+
module Input
|
7
|
+
# Document source handling.
|
8
|
+
module Source
|
9
|
+
# Load a document from a path.
|
10
|
+
class PathInputSource < LocalInputSource
|
11
|
+
# @param filepath [String]
|
12
|
+
# @param fix_pdf [Boolean]
|
13
|
+
def initialize(filepath, fix_pdf: false)
|
14
|
+
io_stream = File.open(filepath, 'rb')
|
15
|
+
super(io_stream, File.basename(filepath), fix_pdf: fix_pdf)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'net/http'
|
4
|
+
require 'uri'
|
5
|
+
require 'fileutils'
|
6
|
+
|
7
|
+
module Mindee
|
8
|
+
module Input
|
9
|
+
module Source
|
10
|
+
# Load a remote document from a file url.
|
11
|
+
class UrlInputSource
|
12
|
+
# @return [String]
|
13
|
+
attr_reader :url
|
14
|
+
|
15
|
+
def initialize(url)
|
16
|
+
raise 'URL must be HTTPS' unless url.start_with? 'https://'
|
17
|
+
|
18
|
+
@url = url
|
19
|
+
end
|
20
|
+
|
21
|
+
# Downloads the file from the URL and saves it to the specified path.
|
22
|
+
#
|
23
|
+
# @param path [String] Path to save the file to.
|
24
|
+
# @param filename [String, nil] Optional name to give to the file.
|
25
|
+
# @param username [String, nil] Optional username for authentication.
|
26
|
+
# @param password [String, nil] Optional password for authentication.
|
27
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
28
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
29
|
+
# @return [String] The full path of the saved file.
|
30
|
+
def save_to_file(path, filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
|
31
|
+
response_body = fetch_file_content(username: username, password: password, token: token,
|
32
|
+
max_redirects: max_redirects)
|
33
|
+
|
34
|
+
filename = fill_filename(filename)
|
35
|
+
|
36
|
+
full_path = File.join(path.chomp('/'), filename)
|
37
|
+
File.write(full_path, response_body)
|
38
|
+
|
39
|
+
full_path
|
40
|
+
end
|
41
|
+
|
42
|
+
# Downloads the file from the url, and returns a BytesInputSource wrapper object for it.
|
43
|
+
#
|
44
|
+
# @param filename [String, nil] Optional name to give to the file.
|
45
|
+
# @param username [String, nil] Optional username for authentication.
|
46
|
+
# @param password [String, nil] Optional password for authentication.
|
47
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
48
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
49
|
+
# @return [BytesInputSource] The full path of the saved file.
|
50
|
+
def as_local_input_source(filename: nil, username: nil, password: nil, token: nil, max_redirects: 3)
|
51
|
+
filename = fill_filename(filename)
|
52
|
+
response_body = fetch_file_content(username: username, password: password, token: token,
|
53
|
+
max_redirects: max_redirects)
|
54
|
+
bytes = StringIO.new(response_body)
|
55
|
+
|
56
|
+
BytesInputSource.new(bytes.read, filename)
|
57
|
+
end
|
58
|
+
|
59
|
+
# Fetches the file content from the URL.
|
60
|
+
#
|
61
|
+
# @param username [String, nil] Optional username for authentication.
|
62
|
+
# @param password [String, nil] Optional password for authentication.
|
63
|
+
# @param token [String, nil] Optional token for JWT-based authentication.
|
64
|
+
# @param max_redirects [Integer] Maximum amount of redirects to follow.
|
65
|
+
# @return [String] The downloaded file content.
|
66
|
+
def fetch_file_content(username: nil, password: nil, token: nil, max_redirects: 3)
|
67
|
+
uri = URI.parse(@url)
|
68
|
+
request = Net::HTTP::Get.new(uri)
|
69
|
+
|
70
|
+
request['Authorization'] = "Bearer #{token}" if token
|
71
|
+
request.basic_auth(username, password) if username && password
|
72
|
+
|
73
|
+
response = make_request(uri, request, max_redirects)
|
74
|
+
if response.code.to_i > 299
|
75
|
+
raise "Failed to download file: HTTP status code #{response.code}"
|
76
|
+
elsif response.code.to_i < 200
|
77
|
+
raise "Failed to download file: Invalid response code #{response.code}."
|
78
|
+
end
|
79
|
+
|
80
|
+
response.body
|
81
|
+
end
|
82
|
+
|
83
|
+
private
|
84
|
+
|
85
|
+
def extract_filename_from_url(uri)
|
86
|
+
filename = File.basename(uri.path)
|
87
|
+
filename.empty? ? '' : filename
|
88
|
+
end
|
89
|
+
|
90
|
+
def fill_filename(filename)
|
91
|
+
filename ||= extract_filename_from_url(URI.parse(@url))
|
92
|
+
if filename.empty? || File.extname(filename).empty?
|
93
|
+
filename = generate_file_name(extension: get_file_extension(filename))
|
94
|
+
end
|
95
|
+
filename
|
96
|
+
end
|
97
|
+
|
98
|
+
def make_request(uri, request, max_redirects)
|
99
|
+
Net::HTTP.start(uri.hostname, uri.port, use_ssl: true) do |http|
|
100
|
+
response = http.request(request)
|
101
|
+
if response.is_a?(Net::HTTPRedirection) && max_redirects.positive?
|
102
|
+
location = response['location']
|
103
|
+
raise 'No location in redirection header.' if location.nil?
|
104
|
+
|
105
|
+
new_uri = URI.parse(location)
|
106
|
+
request = Net::HTTP::Get.new(new_uri)
|
107
|
+
make_request(new_uri, request, max_redirects - 1)
|
108
|
+
else
|
109
|
+
response
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
def get_file_extension(filename)
|
115
|
+
ext = File.extname(filename)
|
116
|
+
ext.empty? ? nil : ext.downcase
|
117
|
+
end
|
118
|
+
|
119
|
+
def generate_file_name(extension: nil)
|
120
|
+
extension ||= '.tmp'
|
121
|
+
random_string = Array.new(8) { rand(36).to_s(36) }.join
|
122
|
+
"mindee_temp_#{Time.now.strftime('%Y-%m-%d_%H-%M-%S')}_#{random_string}#{extension}"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
data/lib/mindee/input/sources.rb
CHANGED
@@ -1,250 +1,8 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
require_relative '
|
7
|
-
require_relative '
|
8
|
-
|
9
|
-
module Mindee
|
10
|
-
module Input
|
11
|
-
# Document source handling.
|
12
|
-
module Source
|
13
|
-
# Mime types accepted by the server.
|
14
|
-
ALLOWED_MIME_TYPES = [
|
15
|
-
'application/pdf',
|
16
|
-
'image/heic',
|
17
|
-
'image/png',
|
18
|
-
'image/jpeg',
|
19
|
-
'image/tiff',
|
20
|
-
'image/webp',
|
21
|
-
].freeze
|
22
|
-
|
23
|
-
# Standard error for invalid mime types
|
24
|
-
class MimeTypeError < StandardError
|
25
|
-
end
|
26
|
-
|
27
|
-
# Error sent if the file's mimetype isn't allowed
|
28
|
-
class InvalidMimeTypeError < MimeTypeError
|
29
|
-
# @return [String]
|
30
|
-
attr_reader :invalid_mimetype
|
31
|
-
|
32
|
-
# @param mime_type [String]
|
33
|
-
def initialize(mime_type)
|
34
|
-
@invalid_mimetype = mime_type
|
35
|
-
super("'#{@invalid_mimetype}' mime type not allowed, must be one of #{ALLOWED_MIME_TYPES.join(', ')}")
|
36
|
-
end
|
37
|
-
end
|
38
|
-
|
39
|
-
# Error sent if a pdf file couldn't be fixed
|
40
|
-
class UnfixablePDFError < MimeTypeError
|
41
|
-
def initialize
|
42
|
-
super("Corrupted PDF couldn't be repaired.")
|
43
|
-
end
|
44
|
-
end
|
45
|
-
|
46
|
-
# Base class for loading documents.
|
47
|
-
class LocalInputSource
|
48
|
-
# @return [String]
|
49
|
-
attr_reader :filename
|
50
|
-
# @return [String]
|
51
|
-
attr_reader :file_mimetype
|
52
|
-
# @return [StringIO]
|
53
|
-
attr_reader :io_stream
|
54
|
-
|
55
|
-
# @param io_stream [StringIO]
|
56
|
-
# @param filename [String]
|
57
|
-
# @param fix_pdf [Boolean]
|
58
|
-
def initialize(io_stream, filename, fix_pdf: false)
|
59
|
-
@io_stream = io_stream
|
60
|
-
@filename = filename
|
61
|
-
@file_mimetype = if fix_pdf
|
62
|
-
Marcel::MimeType.for @io_stream
|
63
|
-
else
|
64
|
-
Marcel::MimeType.for @io_stream, name: @filename
|
65
|
-
end
|
66
|
-
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
67
|
-
|
68
|
-
if filename.end_with?('.pdf') && fix_pdf
|
69
|
-
rescue_broken_pdf(@io_stream)
|
70
|
-
@file_mimetype = Marcel::MimeType.for @io_stream
|
71
|
-
|
72
|
-
return if ALLOWED_MIME_TYPES.include? @file_mimetype
|
73
|
-
end
|
74
|
-
|
75
|
-
raise InvalidMimeTypeError, @file_mimetype.to_s
|
76
|
-
end
|
77
|
-
|
78
|
-
# Attempts to fix pdf files if mimetype is rejected.
|
79
|
-
# "Broken PDFs" are often a result of third-party injecting invalid headers.
|
80
|
-
# This attempts to remove them and send the file
|
81
|
-
# @param stream [StringIO]
|
82
|
-
def rescue_broken_pdf(stream)
|
83
|
-
stream.gets('%PDF-')
|
84
|
-
raise UnfixablePDFError if stream.eof? || stream.pos > 500
|
85
|
-
|
86
|
-
stream.pos = stream.pos - 5
|
87
|
-
data = stream.read
|
88
|
-
@io_stream.close
|
89
|
-
|
90
|
-
@io_stream = StringIO.new
|
91
|
-
@io_stream << data
|
92
|
-
end
|
93
|
-
|
94
|
-
# Shorthand for pdf mimetype validation.
|
95
|
-
def pdf?
|
96
|
-
@file_mimetype.to_s == 'application/pdf'
|
97
|
-
end
|
98
|
-
|
99
|
-
# Parses a PDF file according to provided options.
|
100
|
-
# @param options [Hash, nil] Page cutting/merge options:
|
101
|
-
#
|
102
|
-
# * `:page_indexes` Zero-based list of page indexes.
|
103
|
-
# * `:operation` Operation to apply on the document, given the `page_indexes specified:
|
104
|
-
# * `:KEEP_ONLY` - keep only the specified pages, and remove all others.
|
105
|
-
# * `:REMOVE` - remove the specified pages, and keep all others.
|
106
|
-
# * `:on_min_pages` Apply the operation only if document has at least this many pages.
|
107
|
-
def process_pdf(options)
|
108
|
-
@io_stream.seek(0)
|
109
|
-
@io_stream = PdfProcessor.parse(@io_stream, options)
|
110
|
-
end
|
111
|
-
|
112
|
-
# Reads a document.
|
113
|
-
# @param close [Boolean]
|
114
|
-
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
115
|
-
def read_document(close: true)
|
116
|
-
@io_stream.seek(0)
|
117
|
-
# Avoids needlessly re-packing some files
|
118
|
-
data = @io_stream.read
|
119
|
-
@io_stream.close if close
|
120
|
-
['document', data, { filename: Mindee::Input::Source.convert_to_unicode_escape(@filename) }]
|
121
|
-
end
|
122
|
-
|
123
|
-
def count_pdf_pages
|
124
|
-
return 1 unless pdf?
|
125
|
-
|
126
|
-
@io_stream.seek(0)
|
127
|
-
pdf_processor = Mindee::PDF::PdfProcessor.open_pdf(@io_stream)
|
128
|
-
pdf_processor.pages.size
|
129
|
-
end
|
130
|
-
|
131
|
-
# Compresses the file, according to the provided info.
|
132
|
-
# @param [Integer] quality Quality of the output file.
|
133
|
-
# @param [Integer, nil] max_width Maximum width (Ignored for PDFs).
|
134
|
-
# @param [Integer, nil] max_height Maximum height (Ignored for PDFs).
|
135
|
-
# @param [Boolean] force_source_text Whether to force the operation on PDFs with source text.
|
136
|
-
# This will attempt to re-render PDF text over the rasterized original. If disabled, ignored the operation.
|
137
|
-
# WARNING: this operation is strongly discouraged.
|
138
|
-
# @param [Boolean] disable_source_text If the PDF has source text, whether to re-apply it to the original or
|
139
|
-
# not. Needs force_source_text to work.
|
140
|
-
def compress!(quality: 85, max_width: nil, max_height: nil, force_source_text: false, disable_source_text: true)
|
141
|
-
buffer = if pdf?
|
142
|
-
Mindee::PDF::PDFCompressor.compress_pdf(
|
143
|
-
@io_stream,
|
144
|
-
quality: quality,
|
145
|
-
force_source_text_compression: force_source_text,
|
146
|
-
disable_source_text: disable_source_text
|
147
|
-
)
|
148
|
-
else
|
149
|
-
Mindee::Image::ImageCompressor.compress_image(
|
150
|
-
@io_stream,
|
151
|
-
quality: quality,
|
152
|
-
max_width: max_width,
|
153
|
-
max_height: max_height
|
154
|
-
)
|
155
|
-
end
|
156
|
-
@io_stream = buffer
|
157
|
-
@io_stream.rewind
|
158
|
-
end
|
159
|
-
|
160
|
-
# Checks whether the file has source text if it is a pdf. False otherwise
|
161
|
-
# @return [Boolean] True if the file is a PDF and has source text.
|
162
|
-
def source_text?
|
163
|
-
Mindee::PDF::PDFTools.source_text?(@io_stream)
|
164
|
-
end
|
165
|
-
end
|
166
|
-
|
167
|
-
# Load a document from a path.
|
168
|
-
class PathInputSource < LocalInputSource
|
169
|
-
# @param filepath [String]
|
170
|
-
# @param fix_pdf [Boolean]
|
171
|
-
def initialize(filepath, fix_pdf: false)
|
172
|
-
io_stream = File.open(filepath, 'rb')
|
173
|
-
super(io_stream, File.basename(filepath), fix_pdf: fix_pdf)
|
174
|
-
end
|
175
|
-
end
|
176
|
-
|
177
|
-
# Load a document from a base64 string.
|
178
|
-
class Base64InputSource < LocalInputSource
|
179
|
-
# @param base64_string [String]
|
180
|
-
# @param filename [String]
|
181
|
-
# @param fix_pdf [Boolean]
|
182
|
-
def initialize(base64_string, filename, fix_pdf: false)
|
183
|
-
io_stream = StringIO.new(base64_string.unpack1('m*'))
|
184
|
-
io_stream.set_encoding Encoding::BINARY
|
185
|
-
super(io_stream, filename, fix_pdf: fix_pdf)
|
186
|
-
end
|
187
|
-
|
188
|
-
# Overload of the same function to prevent a base64 from being re-encoded.
|
189
|
-
# @param close [Boolean]
|
190
|
-
# @return [Array<String, [String, aBinaryString ], [Hash, nil] >]
|
191
|
-
def read_document(close: true)
|
192
|
-
@io_stream.seek(0)
|
193
|
-
data = @io_stream.read
|
194
|
-
@io_stream.close if close
|
195
|
-
['document', [data].pack('m'), { filename: Source.convert_to_unicode_escape(@filename) }]
|
196
|
-
end
|
197
|
-
end
|
198
|
-
|
199
|
-
# Load a document from raw bytes.
|
200
|
-
class BytesInputSource < LocalInputSource
|
201
|
-
# @param raw_bytes [String]
|
202
|
-
# @param filename [String]
|
203
|
-
# @param fix_pdf [Boolean]
|
204
|
-
def initialize(raw_bytes, filename, fix_pdf: false)
|
205
|
-
io_stream = StringIO.new(raw_bytes)
|
206
|
-
io_stream.set_encoding Encoding::BINARY
|
207
|
-
super(io_stream, filename, fix_pdf: fix_pdf)
|
208
|
-
end
|
209
|
-
end
|
210
|
-
|
211
|
-
# Load a document from a file handle.
|
212
|
-
class FileInputSource < LocalInputSource
|
213
|
-
# @param input_file [File]
|
214
|
-
# @param filename [String]
|
215
|
-
# @param fix_pdf [Boolean]
|
216
|
-
def initialize(input_file, filename, fix_pdf: false)
|
217
|
-
io_stream = input_file
|
218
|
-
super(io_stream, filename, fix_pdf: fix_pdf)
|
219
|
-
end
|
220
|
-
end
|
221
|
-
|
222
|
-
# Load a remote document from a file url.
|
223
|
-
class UrlInputSource
|
224
|
-
# @return [String]
|
225
|
-
attr_reader :url
|
226
|
-
|
227
|
-
def initialize(url)
|
228
|
-
raise 'URL must be HTTPS' unless url.start_with? 'https://'
|
229
|
-
|
230
|
-
@url = url
|
231
|
-
end
|
232
|
-
end
|
233
|
-
|
234
|
-
# Replaces non-ASCII characters by their unicode escape sequence.
|
235
|
-
# Keeps other characters as is.
|
236
|
-
# @return A clean String.
|
237
|
-
def self.convert_to_unicode_escape(string)
|
238
|
-
unicode_escape_string = ''.dup
|
239
|
-
string.each_char do |char|
|
240
|
-
unicode_escape_string << if char.bytesize > 1
|
241
|
-
"\\u#{char.unpack1('U').to_s(16).rjust(4, '0')}"
|
242
|
-
else
|
243
|
-
char
|
244
|
-
end
|
245
|
-
end
|
246
|
-
unicode_escape_string
|
247
|
-
end
|
248
|
-
end
|
249
|
-
end
|
250
|
-
end
|
3
|
+
require_relative 'sources/local_input_source'
|
4
|
+
require_relative 'sources/bytes_input_source'
|
5
|
+
require_relative 'sources/base64_input_source'
|
6
|
+
require_relative 'sources/file_input_source'
|
7
|
+
require_relative 'sources/path_input_source'
|
8
|
+
require_relative 'sources/url_input_source'
|
data/lib/mindee/product.rb
CHANGED
@@ -13,7 +13,6 @@ require_relative 'product/fr/bank_account_details/bank_account_details_v1'
|
|
13
13
|
require_relative 'product/fr/bank_account_details/bank_account_details_v2'
|
14
14
|
require_relative 'product/fr/bank_statement/bank_statement_v1'
|
15
15
|
require_relative 'product/fr/carte_grise/carte_grise_v1'
|
16
|
-
require_relative 'product/fr/carte_vitale/carte_vitale_v1'
|
17
16
|
require_relative 'product/fr/id_card/id_card_v1'
|
18
17
|
require_relative 'product/fr/id_card/id_card_v2'
|
19
18
|
require_relative 'product/fr/energy_bill/energy_bill_v1'
|
data/lib/mindee/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: mindee
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.18.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Mindee, SA
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-
|
11
|
+
date: 2024-12-13 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: marcel
|
@@ -166,7 +166,6 @@ files:
|
|
166
166
|
- docs/code_samples/bill_of_lading_v1_async.txt
|
167
167
|
- docs/code_samples/business_card_v1_async.txt
|
168
168
|
- docs/code_samples/carte_grise_v1.txt
|
169
|
-
- docs/code_samples/carte_vitale_v1.txt
|
170
169
|
- docs/code_samples/cropper_v1.txt
|
171
170
|
- docs/code_samples/custom_v1.txt
|
172
171
|
- docs/code_samples/default.txt
|
@@ -260,6 +259,12 @@ files:
|
|
260
259
|
- lib/mindee/input.rb
|
261
260
|
- lib/mindee/input/local_response.rb
|
262
261
|
- lib/mindee/input/sources.rb
|
262
|
+
- lib/mindee/input/sources/base64_input_source.rb
|
263
|
+
- lib/mindee/input/sources/bytes_input_source.rb
|
264
|
+
- lib/mindee/input/sources/file_input_source.rb
|
265
|
+
- lib/mindee/input/sources/local_input_source.rb
|
266
|
+
- lib/mindee/input/sources/path_input_source.rb
|
267
|
+
- lib/mindee/input/sources/url_input_source.rb
|
263
268
|
- lib/mindee/parsing.rb
|
264
269
|
- lib/mindee/parsing/common.rb
|
265
270
|
- lib/mindee/parsing/common/api_response.rb
|
@@ -353,9 +358,6 @@ files:
|
|
353
358
|
- lib/mindee/product/fr/carte_grise/carte_grise_v1.rb
|
354
359
|
- lib/mindee/product/fr/carte_grise/carte_grise_v1_document.rb
|
355
360
|
- lib/mindee/product/fr/carte_grise/carte_grise_v1_page.rb
|
356
|
-
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1.rb
|
357
|
-
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1_document.rb
|
358
|
-
- lib/mindee/product/fr/carte_vitale/carte_vitale_v1_page.rb
|
359
361
|
- lib/mindee/product/fr/energy_bill/energy_bill_v1.rb
|
360
362
|
- lib/mindee/product/fr/energy_bill/energy_bill_v1_document.rb
|
361
363
|
- lib/mindee/product/fr/energy_bill/energy_bill_v1_energy_consumer.rb
|
@@ -1,19 +0,0 @@
|
|
1
|
-
require 'mindee'
|
2
|
-
|
3
|
-
# Init a new client
|
4
|
-
mindee_client = Mindee::Client.new(api_key: 'my-api-key')
|
5
|
-
|
6
|
-
# Load a file from disk
|
7
|
-
input_source = mindee_client.source_from_path('/path/to/the/file.ext')
|
8
|
-
|
9
|
-
# Parse the file
|
10
|
-
result = mindee_client.parse(
|
11
|
-
input_source,
|
12
|
-
Mindee::Product::FR::CarteVitale::CarteVitaleV1
|
13
|
-
)
|
14
|
-
|
15
|
-
# Print a full summary of the parsed data in RST format
|
16
|
-
puts result.document
|
17
|
-
|
18
|
-
# Print the document-level parsed data
|
19
|
-
# puts result.document.inference.prediction
|
@@ -1,41 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative '../../../parsing'
|
4
|
-
require_relative 'carte_vitale_v1_document'
|
5
|
-
require_relative 'carte_vitale_v1_page'
|
6
|
-
|
7
|
-
module Mindee
|
8
|
-
module Product
|
9
|
-
module FR
|
10
|
-
# Carte Vitale module.
|
11
|
-
module CarteVitale
|
12
|
-
# Carte Vitale API version 1 inference prediction.
|
13
|
-
class CarteVitaleV1 < Mindee::Parsing::Common::Inference
|
14
|
-
@endpoint_name = 'carte_vitale'
|
15
|
-
@endpoint_version = '1'
|
16
|
-
|
17
|
-
# @param prediction [Hash]
|
18
|
-
def initialize(prediction)
|
19
|
-
super
|
20
|
-
@prediction = CarteVitaleV1Document.new(prediction['prediction'], nil)
|
21
|
-
@pages = []
|
22
|
-
prediction['pages'].each do |page|
|
23
|
-
if page.key?('prediction') && !page['prediction'].nil? && !page['prediction'].empty?
|
24
|
-
@pages.push(CarteVitaleV1Page.new(page))
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|
28
|
-
|
29
|
-
class << self
|
30
|
-
# Name of the endpoint for this product.
|
31
|
-
# @return [String]
|
32
|
-
attr_reader :endpoint_name
|
33
|
-
# Version for this product.
|
34
|
-
# @return [String]
|
35
|
-
attr_reader :endpoint_version
|
36
|
-
end
|
37
|
-
end
|
38
|
-
end
|
39
|
-
end
|
40
|
-
end
|
41
|
-
end
|
@@ -1,52 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative '../../../parsing'
|
4
|
-
|
5
|
-
module Mindee
|
6
|
-
module Product
|
7
|
-
module FR
|
8
|
-
module CarteVitale
|
9
|
-
# Carte Vitale API version 1.1 document data.
|
10
|
-
class CarteVitaleV1Document < Mindee::Parsing::Common::Prediction
|
11
|
-
include Mindee::Parsing::Standard
|
12
|
-
# The given name(s) of the card holder.
|
13
|
-
# @return [Array<Mindee::Parsing::Standard::StringField>]
|
14
|
-
attr_reader :given_names
|
15
|
-
# The date the card was issued.
|
16
|
-
# @return [Mindee::Parsing::Standard::DateField]
|
17
|
-
attr_reader :issuance_date
|
18
|
-
# The Social Security Number (Numéro de Sécurité Sociale) of the card holder
|
19
|
-
# @return [Mindee::Parsing::Standard::StringField]
|
20
|
-
attr_reader :social_security
|
21
|
-
# The surname of the card holder.
|
22
|
-
# @return [Mindee::Parsing::Standard::StringField]
|
23
|
-
attr_reader :surname
|
24
|
-
|
25
|
-
# @param prediction [Hash]
|
26
|
-
# @param page_id [Integer, nil]
|
27
|
-
def initialize(prediction, page_id)
|
28
|
-
super()
|
29
|
-
@given_names = []
|
30
|
-
prediction['given_names'].each do |item|
|
31
|
-
@given_names.push(StringField.new(item, page_id))
|
32
|
-
end
|
33
|
-
@issuance_date = DateField.new(prediction['issuance_date'], page_id)
|
34
|
-
@social_security = StringField.new(prediction['social_security'], page_id)
|
35
|
-
@surname = StringField.new(prediction['surname'], page_id)
|
36
|
-
end
|
37
|
-
|
38
|
-
# @return [String]
|
39
|
-
def to_s
|
40
|
-
given_names = @given_names.join("\n #{' ' * 15}")
|
41
|
-
out_str = String.new
|
42
|
-
out_str << "\n:Given Name(s): #{given_names}".rstrip
|
43
|
-
out_str << "\n:Surname: #{@surname}".rstrip
|
44
|
-
out_str << "\n:Social Security Number: #{@social_security}".rstrip
|
45
|
-
out_str << "\n:Issuance Date: #{@issuance_date}".rstrip
|
46
|
-
out_str[1..].to_s
|
47
|
-
end
|
48
|
-
end
|
49
|
-
end
|
50
|
-
end
|
51
|
-
end
|
52
|
-
end
|
@@ -1,34 +0,0 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require_relative '../../../parsing'
|
4
|
-
require_relative 'carte_vitale_v1_document'
|
5
|
-
|
6
|
-
module Mindee
|
7
|
-
module Product
|
8
|
-
module FR
|
9
|
-
module CarteVitale
|
10
|
-
# Carte Vitale API version 1.1 page data.
|
11
|
-
class CarteVitaleV1Page < Mindee::Parsing::Common::Page
|
12
|
-
# @param prediction [Hash]
|
13
|
-
def initialize(prediction)
|
14
|
-
super(prediction)
|
15
|
-
@prediction = CarteVitaleV1PagePrediction.new(
|
16
|
-
prediction['prediction'],
|
17
|
-
prediction['id']
|
18
|
-
)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
# Carte Vitale V1 page prediction.
|
23
|
-
class CarteVitaleV1PagePrediction < CarteVitaleV1Document
|
24
|
-
# @return [String]
|
25
|
-
def to_s
|
26
|
-
out_str = String.new
|
27
|
-
out_str << "\n#{super}"
|
28
|
-
out_str
|
29
|
-
end
|
30
|
-
end
|
31
|
-
end
|
32
|
-
end
|
33
|
-
end
|
34
|
-
end
|