libis-format 1.2.7 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -1
- data/lib/libis/format/cli/prompt_helper.rb +32 -24
- data/lib/libis/format/converter/audio_converter.rb +23 -30
- data/lib/libis/format/identifier.rb +1 -1
- data/lib/libis/format/tool/extension_identification.rb +23 -25
- data/lib/libis/format/tool/fido.rb +22 -27
- data/lib/libis/format/tool/file_tool.rb +11 -24
- data/lib/libis/format/tool/fop_pdf.rb +19 -20
- data/lib/libis/format/tool/identification_tool.rb +34 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +66 -111
- data/lib/libis/format/tool/office_to_pdf.rb +24 -21
- data/lib/libis/format/tool/pdf_split.rb +19 -20
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -30
- data/lib/libis/format/tool/pdfa_validator.rb +16 -14
- data/lib/libis/format/version.rb +3 -1
- data/libis-format.gemspec +23 -22
- data/tools/fop/fop.bat +75 -75
- data/tools/fop/fop.cmd +31 -31
- data/tools/fop/fop.js +341 -341
- data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
- data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
- data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
- data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
- metadata +69 -79
- data/.coveralls.yml +0 -2
- data/.gitignore +0 -21
- data/.travis.yml +0 -74
- data/.vscode/launch.json +0 -21
- data/Rakefile +0 -12
- data/base/Dockerfile +0 -33
- data/base/Dockerfile.alpine +0 -20
- data/base/Dockerfile.rvm +0 -56
- data/base/rework_path +0 -25
- data/docker_cfg.yml +0 -1
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
2
4
|
require 'tmpdir'
|
3
5
|
|
@@ -12,42 +14,41 @@ require 'libis/format/type_database'
|
|
12
14
|
module Libis
|
13
15
|
module Format
|
14
16
|
module Tool
|
15
|
-
|
16
|
-
class IdentificationTool
|
17
|
+
class IdentificationTool
|
17
18
|
include Singleton
|
18
19
|
include ::Libis::Tools::Logger
|
19
20
|
|
20
21
|
def self.bad_mimetype(mimetype)
|
21
|
-
|
22
|
+
instance.bad_mimetype(mimetype)
|
22
23
|
end
|
23
24
|
|
24
|
-
def self.run(file, recursive = false, options
|
25
|
-
options ||= {}
|
25
|
+
def self.run(file, recursive = false, **options)
|
26
26
|
if file.is_a?(Array)
|
27
|
-
return run_list file, options
|
28
|
-
elsif file.is_a?(String) && File.
|
27
|
+
return run_list file, **options
|
28
|
+
elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
|
29
29
|
if File.directory?(file)
|
30
|
-
return run_dir(file, recursive, options)
|
30
|
+
return run_dir(file, recursive, **options)
|
31
31
|
elsif File.file?(file)
|
32
|
-
return
|
32
|
+
return instance.run(file, **options)
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
35
36
|
raise ArgumentError,
|
36
37
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
37
38
|
end
|
38
39
|
|
39
|
-
def self.run_dir(file, recursive = true, options
|
40
|
-
|
40
|
+
def self.run_dir(file, recursive = true, **options)
|
41
|
+
instance.run_dir file, recursive, **options
|
41
42
|
end
|
42
43
|
|
43
|
-
def self.run_list(filelist
|
44
|
-
|
44
|
+
def self.run_list(filelist, **options)
|
45
|
+
instance.run_list filelist, **options
|
45
46
|
end
|
46
47
|
|
47
48
|
protected
|
48
49
|
|
49
50
|
def create_list_file(filelist)
|
50
|
-
list_file = Tempfile.new(%w
|
51
|
+
list_file = Tempfile.new(%w[file .list])
|
51
52
|
filelist.each do |fname|
|
52
53
|
list_file.write "#{fname}\n"
|
53
54
|
end
|
@@ -83,20 +84,19 @@ module Libis
|
|
83
84
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
84
85
|
#
|
85
86
|
def process_output(output)
|
86
|
-
output.
|
87
|
+
output.each_with_object({}) do |x, results|
|
87
88
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
88
89
|
results[filepath] ||= []
|
89
90
|
results[filepath] << annotate(x)
|
90
|
-
results
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
94
|
# Enhance the output with mimetype and score
|
95
95
|
def annotate(result)
|
96
96
|
# Enhance result with mimetype if needed
|
97
|
-
|
97
|
+
bad_mimetypes.include?(result[:mimetype]) &&
|
98
|
+
!bad_puids.include?(result[:puid]) &&
|
98
99
|
result[:mimetype] = get_mimetype(result[:puid])
|
99
|
-
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
102
|
Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
|
@@ -108,55 +108,54 @@ module Libis
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
109
109
|
|
110
110
|
# freeze all strings
|
111
|
-
result.each {|_, v| v.freeze if v.is_a?(String)}
|
111
|
+
result.each { |_, v| v.freeze if v.is_a?(String) }
|
112
112
|
|
113
113
|
# Adapt score based on matchtype
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
115
115
|
case result[:matchtype]
|
116
116
|
|
117
117
|
# Signature match increases score with 2
|
118
|
-
|
119
|
-
|
118
|
+
when 'signature'
|
119
|
+
result[:score] += 2
|
120
120
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
|
-
|
126
|
-
|
125
|
+
when 'container'
|
126
|
+
result[:score] += 4
|
127
127
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
132
|
-
|
133
|
-
|
132
|
+
when 'extension'
|
133
|
+
result[:score] -= 2
|
134
134
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
136
|
-
|
137
|
-
|
136
|
+
when 'magic'
|
137
|
+
result[:score] -= 3
|
138
138
|
|
139
|
-
# Or no change otherwise
|
140
|
-
else
|
141
|
-
# do nothing
|
142
139
|
end
|
143
140
|
|
144
141
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
145
142
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
146
|
-
if result[:mimetype] == 'application/zip'
|
147
|
-
result[:score] -= 2
|
148
|
-
end
|
143
|
+
result[:score] -= 2 if result[:mimetype] == 'application/zip'
|
149
144
|
|
150
145
|
# Return result enhanced with mimetype and score fields
|
151
146
|
result
|
152
147
|
end
|
153
148
|
|
154
149
|
def get_mimetype(puid)
|
155
|
-
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
150
|
+
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
151
|
+
rescue StandardError
|
152
|
+
nil
|
156
153
|
end
|
157
154
|
|
158
155
|
def get_puid(mimetype)
|
159
|
-
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
156
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
157
|
+
rescue StandardError
|
158
|
+
nil
|
160
159
|
end
|
161
160
|
|
162
161
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -170,7 +169,6 @@ module Libis
|
|
170
169
|
@bad_mimetypes << mimetype
|
171
170
|
end
|
172
171
|
end
|
173
|
-
|
174
172
|
end
|
175
173
|
end
|
176
174
|
end
|
@@ -1,15 +1,12 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mapi/msg'
|
4
4
|
require 'rfc_2047'
|
5
5
|
require 'cgi'
|
6
6
|
require 'pdfkit'
|
7
|
-
|
8
7
|
require 'time'
|
9
|
-
|
10
8
|
require 'fileutils'
|
11
9
|
require 'pathname'
|
12
|
-
|
13
10
|
require 'libis/format/config'
|
14
11
|
|
15
12
|
module Libis
|
@@ -18,23 +15,23 @@ module Libis
|
|
18
15
|
class MsgToPdf
|
19
16
|
include ::Libis::Tools::Logger
|
20
17
|
|
21
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {background: white
|
18
|
+
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
22
19
|
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
23
20
|
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
24
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>'
|
21
|
+
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
25
22
|
|
26
|
-
IMG_CID_PLAIN_REGEX =
|
27
|
-
IMG_CID_HTML_REGEX =
|
23
|
+
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
24
|
+
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
28
25
|
|
29
26
|
def self.installed?
|
30
27
|
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
31
28
|
end
|
32
29
|
|
33
|
-
def self.run(source, target, options
|
34
|
-
new.run source, target, options
|
30
|
+
def self.run(source, target, **options)
|
31
|
+
new.run source, target, **options
|
35
32
|
end
|
36
33
|
|
37
|
-
def run(source, target, options
|
34
|
+
def run(source, target, **options)
|
38
35
|
# Preliminary checks
|
39
36
|
# ------------------
|
40
37
|
|
@@ -56,40 +53,15 @@ module Libis
|
|
56
53
|
end
|
57
54
|
|
58
55
|
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
59
|
-
|
60
56
|
# Make sure the target directory exists
|
61
57
|
outdir = File.dirname(target)
|
62
58
|
FileUtils.mkdir_p(outdir)
|
63
59
|
|
64
|
-
# puts "Headers:"
|
65
|
-
# puts '--------'
|
66
|
-
# pp msg.headers
|
67
|
-
|
68
|
-
# puts "Recipients:"
|
69
|
-
# puts '-----------'
|
70
|
-
# pp msg.recipients
|
71
|
-
|
72
|
-
# puts "Body:"
|
73
|
-
# puts '-----'
|
74
|
-
# puts msg.properties.body
|
75
|
-
# puts '-----'
|
76
|
-
# puts msg.properties.body_rtf
|
77
|
-
# puts '-----'
|
78
|
-
# puts msg.properties.body_html
|
79
|
-
|
80
|
-
# puts "Attachments:"
|
81
|
-
# puts '------------'
|
82
|
-
# msg.attachments.each {|a| p "#{a.filename} - #{a.properties.attach_content_id}"}
|
83
|
-
|
84
|
-
# puts "Converting:"
|
85
|
-
# puts '-----------'
|
86
|
-
|
87
60
|
# Get the body of the message in HTML
|
88
61
|
body = msg.properties.body_html
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
end
|
62
|
+
|
63
|
+
# Embed plain body in HTML as a fallback
|
64
|
+
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
93
65
|
|
94
66
|
# Check and fix the character encoding
|
95
67
|
begin
|
@@ -97,8 +69,8 @@ module Libis
|
|
97
69
|
body.encode!('UTF-8', universal_newline: true)
|
98
70
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
99
71
|
begin
|
100
|
-
|
101
|
-
|
72
|
+
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
73
|
+
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
102
74
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
103
75
|
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
104
76
|
@warnings << "#{e.class}: #{e.message}"
|
@@ -111,7 +83,7 @@ module Libis
|
|
111
83
|
headers = {}
|
112
84
|
hdr_html = ''
|
113
85
|
|
114
|
-
%w
|
86
|
+
%w[From To Cc Subject Date].each do |key|
|
115
87
|
value = find_hdr(msg.headers, key)
|
116
88
|
if value
|
117
89
|
headers[key.downcase.to_sym] = value
|
@@ -121,21 +93,25 @@ module Libis
|
|
121
93
|
|
122
94
|
[:date].each do |key|
|
123
95
|
next unless headers[key]
|
96
|
+
|
124
97
|
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
125
98
|
end
|
126
99
|
|
127
100
|
# Add header section to the HTML body
|
128
101
|
unless hdr_html.empty?
|
129
102
|
# Insert header block styles
|
130
|
-
if body =~
|
103
|
+
if body =~ %r{</head>}
|
131
104
|
# if head exists, append the style block
|
132
|
-
body.gsub!(
|
105
|
+
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
106
|
+
elsif body =~ %r{<head/>}
|
107
|
+
# empty head, replace with the style block
|
108
|
+
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
133
109
|
else
|
134
110
|
# otherwise insert a head section before the body tag
|
135
|
-
body.gsub!(/<body/,
|
111
|
+
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
136
112
|
end
|
137
113
|
# Add the headers html table as first element in the body section
|
138
|
-
body.gsub!(/<body[^>]*>/) {|m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}"}
|
114
|
+
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
139
115
|
end
|
140
116
|
|
141
117
|
# Embed inline images
|
@@ -144,29 +120,23 @@ module Libis
|
|
144
120
|
used_files = []
|
145
121
|
|
146
122
|
# First process plaintext cid entries
|
147
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |
|
148
|
-
|
149
|
-
data = getAttachmentData(attachments, $1)
|
123
|
+
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
124
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
150
125
|
if data
|
151
|
-
|
152
|
-
used_files << $1
|
126
|
+
used_files << ::Regexp.last_match(1)
|
153
127
|
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
154
128
|
else
|
155
|
-
# puts "cid #{$1} not found"
|
156
129
|
'<img src=""/>'
|
157
130
|
end
|
158
131
|
end
|
159
|
-
|
132
|
+
|
160
133
|
# Then process HTML img tags with CID entries
|
161
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |
|
162
|
-
|
163
|
-
data
|
164
|
-
|
165
|
-
|
166
|
-
used_files << $1
|
167
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
134
|
+
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
135
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
136
|
+
if data
|
137
|
+
used_files << ::Regexp.last_match(1)
|
138
|
+
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
168
139
|
else
|
169
|
-
# puts "cid #{$1} not found"
|
170
140
|
''
|
171
141
|
end
|
172
142
|
end
|
@@ -176,59 +146,52 @@ module Libis
|
|
176
146
|
files = []
|
177
147
|
|
178
148
|
if target_format == :PDF
|
179
|
-
|
149
|
+
# PDF creation options
|
180
150
|
pdf_options = {
|
181
151
|
page_size: 'A4',
|
182
152
|
margin_top: '10mm',
|
183
153
|
margin_bottom: '10mm',
|
184
154
|
margin_left: '10mm',
|
185
155
|
margin_right: '10mm',
|
186
|
-
dpi: 300,
|
187
156
|
# image_quality: 100,
|
188
157
|
# viewport_size: '2480x3508',
|
158
|
+
dpi: 300
|
189
159
|
}.merge pdf_options
|
190
160
|
|
191
|
-
# pp pdf_options
|
192
|
-
# puts "Final HTML body:"
|
193
|
-
# pp body
|
194
161
|
subject = find_hdr(msg.headers, 'Subject')
|
195
162
|
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
196
163
|
pdf = kit.to_pdf
|
197
|
-
File.open(target, 'wb') {|f| f.write(pdf)}
|
198
|
-
# puts "message #{subject} converted to PDF file '#{target}'"
|
164
|
+
File.open(target, 'wb') { |f| f.write(pdf) }
|
199
165
|
else
|
200
|
-
File.open(target, 'wb') {|f| f.write(body)}
|
201
|
-
# puts "message #{subject} converted to HTML file '#{target}'"
|
166
|
+
File.open(target, 'wb') { |f| f.write(body) }
|
202
167
|
end
|
203
168
|
files << target if File.exist?(target)
|
204
169
|
|
205
170
|
# Save attachments
|
206
171
|
# ----------------
|
207
172
|
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
208
|
-
digits = ((attachments.count + 1)/ 10) + 1
|
173
|
+
digits = ((attachments.count + 1) / 10) + 1
|
209
174
|
i = 1
|
210
|
-
attachments.delete_if {|a| a.properties.attachment_hidden}.each do |a|
|
211
|
-
prefix = "#{
|
212
|
-
if sub_msg = a.instance_variable_get(:@embedded_msg)
|
213
|
-
|
214
|
-
subject = a.properties[:display_name] || sub_msg.subject || ""
|
175
|
+
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
176
|
+
prefix = "#{format('%0*d', digits, i)}-"
|
177
|
+
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
178
|
+
subject = a.properties[:display_name] || sub_msg.subject || ''
|
215
179
|
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
216
|
-
|
217
180
|
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
218
|
-
if e = result[:error]
|
219
|
-
raise
|
181
|
+
if (e = result[:error])
|
182
|
+
raise e
|
220
183
|
end
|
184
|
+
|
221
185
|
files += result[:files]
|
222
186
|
elsif a.filename
|
223
187
|
next if used_files.include?(a.filename)
|
224
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
225
188
|
|
189
|
+
file = File.join(outdir, "#{prefix}#{a.filename}")
|
226
190
|
FileUtils.mkdir_p(File.dirname(file))
|
227
|
-
File.open(file, 'wb') {|f| a.save(f)}
|
191
|
+
File.open(file, 'wb') { |f| a.save(f) }
|
228
192
|
files << file
|
229
|
-
# puts "Attachment file '#{file}' created"
|
230
193
|
else
|
231
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be
|
194
|
+
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
232
195
|
next
|
233
196
|
end
|
234
197
|
i += 1
|
@@ -240,28 +203,26 @@ module Libis
|
|
240
203
|
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
241
204
|
end
|
242
205
|
end
|
243
|
-
|
206
|
+
|
244
207
|
{
|
245
|
-
command: {status: 0},
|
246
|
-
files
|
247
|
-
headers
|
208
|
+
command: { status: 0 },
|
209
|
+
files:,
|
210
|
+
headers:,
|
248
211
|
warnings: @warnings
|
249
212
|
}
|
250
|
-
|
251
213
|
rescue Exception => e
|
252
|
-
# puts "ERROR: Exception #{e.class} raised: #{e.message}"
|
253
|
-
# e.backtrace.each {|t| puts " - #{t}"}
|
254
214
|
raise unless root_msg
|
215
|
+
|
255
216
|
msg.close
|
256
|
-
|
257
|
-
command: {status: -1},
|
217
|
+
{
|
218
|
+
command: { status: -1 },
|
258
219
|
files: [],
|
259
220
|
headers: {},
|
260
221
|
errors: [
|
261
222
|
{
|
262
223
|
error: e.message,
|
263
224
|
error_class: e.class.name,
|
264
|
-
error_trace: e.backtrace
|
225
|
+
error_trace: e.backtrace
|
265
226
|
}
|
266
227
|
],
|
267
228
|
warnings: @warnings
|
@@ -270,15 +231,13 @@ module Libis
|
|
270
231
|
|
271
232
|
protected
|
272
233
|
|
273
|
-
def eml_to_html
|
274
|
-
|
275
|
-
end
|
234
|
+
def eml_to_html; end
|
276
235
|
|
277
236
|
private
|
278
237
|
|
279
238
|
def find_hdr(list, key)
|
280
239
|
keys = list.keys
|
281
|
-
if k = keys.find {|x| x.to_s =~ /^#{key}$/i}
|
240
|
+
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
282
241
|
v = list[k]
|
283
242
|
v = v.first if v.is_a? Array
|
284
243
|
v = Rfc2047.decode(v).strip if v.is_a? String
|
@@ -288,27 +247,23 @@ module Libis
|
|
288
247
|
end
|
289
248
|
|
290
249
|
def hdr_html(key, value)
|
291
|
-
return HEADER_FIELD_TEMPLATE
|
250
|
+
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
251
|
+
|
292
252
|
''
|
293
253
|
end
|
294
254
|
|
295
|
-
def
|
255
|
+
def get_attachment_data(attachments, cid)
|
296
256
|
attachments.each do |attachment|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
257
|
+
next unless attachment.properties.attach_content_id == cid
|
258
|
+
|
259
|
+
attachment.data.rewind
|
260
|
+
return {
|
261
|
+
mime_type: attachment.properties.attach_mime_tag,
|
262
|
+
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
263
|
+
}
|
304
264
|
end
|
305
265
|
nil
|
306
266
|
end
|
307
|
-
|
308
|
-
def read_header(headers_file)
|
309
|
-
headers = YAML.load_file(headers_file)
|
310
|
-
headers.symbolize_keys
|
311
|
-
end
|
312
267
|
end
|
313
268
|
end
|
314
269
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'fileutils'
|
2
4
|
|
3
5
|
require 'libis/tools/extend/string'
|
@@ -9,60 +11,61 @@ require 'libis/format/config'
|
|
9
11
|
module Libis
|
10
12
|
module Format
|
11
13
|
module Tool
|
12
|
-
|
13
14
|
class OfficeToPdf
|
14
15
|
include ::Libis::Tools::Logger
|
15
16
|
|
16
17
|
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd],
|
18
|
-
result[:status]
|
18
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
+
(result[:status]).zero?
|
19
20
|
end
|
20
21
|
|
21
|
-
def self.run(source, target, options
|
22
|
-
|
22
|
+
def self.run(source, target, **options)
|
23
|
+
new.run source, target, **options
|
23
24
|
end
|
24
25
|
|
25
|
-
def run(source, target, options
|
26
|
+
def run(source, target, **options)
|
26
27
|
workdir = '/...'
|
27
28
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
28
29
|
|
29
|
-
workdir = File.join(workdir, rand(
|
30
|
+
workdir = File.join(workdir, rand(1_000_000).to_s)
|
30
31
|
FileUtils.mkpath(workdir)
|
31
32
|
|
32
33
|
src_file = File.join(workdir, File.basename(source))
|
33
34
|
FileUtils.symlink source, src_file
|
34
35
|
|
35
|
-
tgt_file = File.join(workdir, File.basename(source, '.*')
|
36
|
+
tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
|
36
37
|
|
37
38
|
export_filter = options[:export_filter] || 'pdf'
|
38
39
|
|
39
40
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
40
41
|
result = Libis::Tools::Command.run(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
43
|
+
"-env:UserInstallation=file://#{workdir}",
|
44
|
+
'--convert-to', export_filter,
|
45
|
+
'--outdir', workdir, src_file,
|
46
|
+
timeout:,
|
47
|
+
kill_after: timeout * 2
|
47
48
|
)
|
48
49
|
|
49
|
-
raise
|
50
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
51
|
+
|
50
52
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
51
|
-
raise
|
53
|
+
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
52
54
|
|
53
55
|
FileUtils.copy tgt_file, target, preserve: true
|
54
56
|
|
55
57
|
{
|
56
58
|
command: result,
|
57
|
-
files: [
|
59
|
+
files: [target]
|
58
60
|
}
|
59
|
-
|
60
61
|
ensure
|
61
|
-
|
62
|
-
|
62
|
+
begin
|
63
|
+
FileUtils.rmtree workdir
|
64
|
+
rescue StandardError
|
65
|
+
nil
|
66
|
+
end
|
63
67
|
end
|
64
68
|
end
|
65
|
-
|
66
69
|
end
|
67
70
|
end
|
68
71
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'os'
|
2
4
|
|
3
5
|
require 'libis/tools/extend/string'
|
@@ -9,49 +11,46 @@ require 'libis/format/config'
|
|
9
11
|
module Libis
|
10
12
|
module Format
|
11
13
|
module Tool
|
12
|
-
|
13
14
|
class PdfSplit
|
14
15
|
include ::Libis::Tools::Logger
|
15
16
|
|
16
17
|
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd],
|
18
|
-
return false unless result[:status]
|
18
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
|
19
|
+
return false unless (result[:status]).zero?
|
20
|
+
|
19
21
|
File.exist?(Libis::Format::Config[:pdf_tool])
|
20
22
|
end
|
21
23
|
|
22
|
-
def self.run(source, target,
|
23
|
-
|
24
|
+
def self.run(source, target, *args)
|
25
|
+
new.run source, target, *args
|
24
26
|
end
|
25
27
|
|
26
|
-
def run(source, target,
|
27
|
-
|
28
|
+
def run(source, target, *args)
|
28
29
|
if OS.java?
|
29
30
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
30
31
|
end
|
31
32
|
|
32
33
|
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
33
34
|
result = Libis::Tools::Command.run(
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
Libis::Format::Config[:java_cmd],
|
36
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
37
|
+
'SplitPdf',
|
38
|
+
'--file_input', source,
|
39
|
+
'--file_output', target,
|
40
|
+
*args,
|
41
|
+
timeout:,
|
42
|
+
kill_after: timeout * 2
|
42
43
|
)
|
43
44
|
|
44
|
-
raise
|
45
|
-
raise
|
45
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
+
raise "#{self.class} errors: #{result[:err].join("\n")}" unless (result[:status]).zero? && result[:err].empty?
|
46
47
|
|
47
48
|
{
|
48
49
|
command: result,
|
49
|
-
files: [
|
50
|
+
files: [target] # TODO: collect the files
|
50
51
|
}
|
51
|
-
|
52
52
|
end
|
53
53
|
end
|
54
|
-
|
55
54
|
end
|
56
55
|
end
|
57
56
|
end
|