libis-format 1.2.7 → 1.2.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +7 -1
- data/lib/libis/format/cli/prompt_helper.rb +32 -24
- data/lib/libis/format/converter/audio_converter.rb +23 -30
- data/lib/libis/format/identifier.rb +1 -1
- data/lib/libis/format/tool/extension_identification.rb +23 -25
- data/lib/libis/format/tool/fido.rb +22 -27
- data/lib/libis/format/tool/file_tool.rb +11 -24
- data/lib/libis/format/tool/fop_pdf.rb +19 -20
- data/lib/libis/format/tool/identification_tool.rb +34 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +66 -111
- data/lib/libis/format/tool/office_to_pdf.rb +24 -21
- data/lib/libis/format/tool/pdf_split.rb +19 -20
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -30
- data/lib/libis/format/tool/pdfa_validator.rb +16 -14
- data/lib/libis/format/version.rb +3 -1
- data/libis-format.gemspec +23 -22
- data/tools/fop/fop.bat +75 -75
- data/tools/fop/fop.cmd +31 -31
- data/tools/fop/fop.js +341 -341
- data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
- data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
- data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
- data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
- metadata +69 -79
- data/.coveralls.yml +0 -2
- data/.gitignore +0 -21
- data/.travis.yml +0 -74
- data/.vscode/launch.json +0 -21
- data/Rakefile +0 -12
- data/base/Dockerfile +0 -33
- data/base/Dockerfile.alpine +0 -20
- data/base/Dockerfile.rvm +0 -56
- data/base/rework_path +0 -25
- data/docker_cfg.yml +0 -1
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'csv'
|
2
4
|
require 'tmpdir'
|
3
5
|
|
@@ -12,42 +14,41 @@ require 'libis/format/type_database'
|
|
12
14
|
module Libis
|
13
15
|
module Format
|
14
16
|
module Tool
|
15
|
-
|
16
|
-
class IdentificationTool
|
17
|
+
class IdentificationTool
|
17
18
|
include Singleton
|
18
19
|
include ::Libis::Tools::Logger
|
19
20
|
|
20
21
|
def self.bad_mimetype(mimetype)
|
21
|
-
|
22
|
+
instance.bad_mimetype(mimetype)
|
22
23
|
end
|
23
24
|
|
24
|
-
def self.run(file, recursive = false, options
|
25
|
-
options ||= {}
|
25
|
+
def self.run(file, recursive = false, **options)
|
26
26
|
if file.is_a?(Array)
|
27
|
-
return run_list file, options
|
28
|
-
elsif file.is_a?(String) && File.
|
27
|
+
return run_list file, **options
|
28
|
+
elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
|
29
29
|
if File.directory?(file)
|
30
|
-
return run_dir(file, recursive, options)
|
30
|
+
return run_dir(file, recursive, **options)
|
31
31
|
elsif File.file?(file)
|
32
|
-
return
|
32
|
+
return instance.run(file, **options)
|
33
33
|
end
|
34
34
|
end
|
35
|
+
|
35
36
|
raise ArgumentError,
|
36
37
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
37
38
|
end
|
38
39
|
|
39
|
-
def self.run_dir(file, recursive = true, options
|
40
|
-
|
40
|
+
def self.run_dir(file, recursive = true, **options)
|
41
|
+
instance.run_dir file, recursive, **options
|
41
42
|
end
|
42
43
|
|
43
|
-
def self.run_list(filelist
|
44
|
-
|
44
|
+
def self.run_list(filelist, **options)
|
45
|
+
instance.run_list filelist, **options
|
45
46
|
end
|
46
47
|
|
47
48
|
protected
|
48
49
|
|
49
50
|
def create_list_file(filelist)
|
50
|
-
list_file = Tempfile.new(%w
|
51
|
+
list_file = Tempfile.new(%w[file .list])
|
51
52
|
filelist.each do |fname|
|
52
53
|
list_file.write "#{fname}\n"
|
53
54
|
end
|
@@ -83,20 +84,19 @@ module Libis
|
|
83
84
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
84
85
|
#
|
85
86
|
def process_output(output)
|
86
|
-
output.
|
87
|
+
output.each_with_object({}) do |x, results|
|
87
88
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
88
89
|
results[filepath] ||= []
|
89
90
|
results[filepath] << annotate(x)
|
90
|
-
results
|
91
91
|
end
|
92
92
|
end
|
93
93
|
|
94
94
|
# Enhance the output with mimetype and score
|
95
95
|
def annotate(result)
|
96
96
|
# Enhance result with mimetype if needed
|
97
|
-
|
97
|
+
bad_mimetypes.include?(result[:mimetype]) &&
|
98
|
+
!bad_puids.include?(result[:puid]) &&
|
98
99
|
result[:mimetype] = get_mimetype(result[:puid])
|
99
|
-
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
102
|
Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
|
@@ -108,55 +108,54 @@ module Libis
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
109
109
|
|
110
110
|
# freeze all strings
|
111
|
-
result.each {|_, v| v.freeze if v.is_a?(String)}
|
111
|
+
result.each { |_, v| v.freeze if v.is_a?(String) }
|
112
112
|
|
113
113
|
# Adapt score based on matchtype
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
115
115
|
case result[:matchtype]
|
116
116
|
|
117
117
|
# Signature match increases score with 2
|
118
|
-
|
119
|
-
|
118
|
+
when 'signature'
|
119
|
+
result[:score] += 2
|
120
120
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
|
-
|
126
|
-
|
125
|
+
when 'container'
|
126
|
+
result[:score] += 4
|
127
127
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
132
|
-
|
133
|
-
|
132
|
+
when 'extension'
|
133
|
+
result[:score] -= 2
|
134
134
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
136
|
-
|
137
|
-
|
136
|
+
when 'magic'
|
137
|
+
result[:score] -= 3
|
138
138
|
|
139
|
-
# Or no change otherwise
|
140
|
-
else
|
141
|
-
# do nothing
|
142
139
|
end
|
143
140
|
|
144
141
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
145
142
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
146
|
-
if result[:mimetype] == 'application/zip'
|
147
|
-
result[:score] -= 2
|
148
|
-
end
|
143
|
+
result[:score] -= 2 if result[:mimetype] == 'application/zip'
|
149
144
|
|
150
145
|
# Return result enhanced with mimetype and score fields
|
151
146
|
result
|
152
147
|
end
|
153
148
|
|
154
149
|
def get_mimetype(puid)
|
155
|
-
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
150
|
+
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
151
|
+
rescue StandardError
|
152
|
+
nil
|
156
153
|
end
|
157
154
|
|
158
155
|
def get_puid(mimetype)
|
159
|
-
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
156
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
157
|
+
rescue StandardError
|
158
|
+
nil
|
160
159
|
end
|
161
160
|
|
162
161
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -170,7 +169,6 @@ module Libis
|
|
170
169
|
@bad_mimetypes << mimetype
|
171
170
|
end
|
172
171
|
end
|
173
|
-
|
174
172
|
end
|
175
173
|
end
|
176
174
|
end
|
@@ -1,15 +1,12 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'mapi/msg'
|
4
4
|
require 'rfc_2047'
|
5
5
|
require 'cgi'
|
6
6
|
require 'pdfkit'
|
7
|
-
|
8
7
|
require 'time'
|
9
|
-
|
10
8
|
require 'fileutils'
|
11
9
|
require 'pathname'
|
12
|
-
|
13
10
|
require 'libis/format/config'
|
14
11
|
|
15
12
|
module Libis
|
@@ -18,23 +15,23 @@ module Libis
|
|
18
15
|
class MsgToPdf
|
19
16
|
include ::Libis::Tools::Logger
|
20
17
|
|
21
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {background: white
|
18
|
+
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
22
19
|
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
23
20
|
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
24
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>'
|
21
|
+
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
25
22
|
|
26
|
-
IMG_CID_PLAIN_REGEX =
|
27
|
-
IMG_CID_HTML_REGEX =
|
23
|
+
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
24
|
+
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
28
25
|
|
29
26
|
def self.installed?
|
30
27
|
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
31
28
|
end
|
32
29
|
|
33
|
-
def self.run(source, target, options
|
34
|
-
new.run source, target, options
|
30
|
+
def self.run(source, target, **options)
|
31
|
+
new.run source, target, **options
|
35
32
|
end
|
36
33
|
|
37
|
-
def run(source, target, options
|
34
|
+
def run(source, target, **options)
|
38
35
|
# Preliminary checks
|
39
36
|
# ------------------
|
40
37
|
|
@@ -56,40 +53,15 @@ module Libis
|
|
56
53
|
end
|
57
54
|
|
58
55
|
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
59
|
-
|
60
56
|
# Make sure the target directory exists
|
61
57
|
outdir = File.dirname(target)
|
62
58
|
FileUtils.mkdir_p(outdir)
|
63
59
|
|
64
|
-
# puts "Headers:"
|
65
|
-
# puts '--------'
|
66
|
-
# pp msg.headers
|
67
|
-
|
68
|
-
# puts "Recipients:"
|
69
|
-
# puts '-----------'
|
70
|
-
# pp msg.recipients
|
71
|
-
|
72
|
-
# puts "Body:"
|
73
|
-
# puts '-----'
|
74
|
-
# puts msg.properties.body
|
75
|
-
# puts '-----'
|
76
|
-
# puts msg.properties.body_rtf
|
77
|
-
# puts '-----'
|
78
|
-
# puts msg.properties.body_html
|
79
|
-
|
80
|
-
# puts "Attachments:"
|
81
|
-
# puts '------------'
|
82
|
-
# msg.attachments.each {|a| p "#{a.filename} - #{a.properties.attach_content_id}"}
|
83
|
-
|
84
|
-
# puts "Converting:"
|
85
|
-
# puts '-----------'
|
86
|
-
|
87
60
|
# Get the body of the message in HTML
|
88
61
|
body = msg.properties.body_html
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
end
|
62
|
+
|
63
|
+
# Embed plain body in HTML as a fallback
|
64
|
+
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
93
65
|
|
94
66
|
# Check and fix the character encoding
|
95
67
|
begin
|
@@ -97,8 +69,8 @@ module Libis
|
|
97
69
|
body.encode!('UTF-8', universal_newline: true)
|
98
70
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
99
71
|
begin
|
100
|
-
|
101
|
-
|
72
|
+
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
73
|
+
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
102
74
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
103
75
|
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
104
76
|
@warnings << "#{e.class}: #{e.message}"
|
@@ -111,7 +83,7 @@ module Libis
|
|
111
83
|
headers = {}
|
112
84
|
hdr_html = ''
|
113
85
|
|
114
|
-
%w
|
86
|
+
%w[From To Cc Subject Date].each do |key|
|
115
87
|
value = find_hdr(msg.headers, key)
|
116
88
|
if value
|
117
89
|
headers[key.downcase.to_sym] = value
|
@@ -121,21 +93,25 @@ module Libis
|
|
121
93
|
|
122
94
|
[:date].each do |key|
|
123
95
|
next unless headers[key]
|
96
|
+
|
124
97
|
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
125
98
|
end
|
126
99
|
|
127
100
|
# Add header section to the HTML body
|
128
101
|
unless hdr_html.empty?
|
129
102
|
# Insert header block styles
|
130
|
-
if body =~
|
103
|
+
if body =~ %r{</head>}
|
131
104
|
# if head exists, append the style block
|
132
|
-
body.gsub!(
|
105
|
+
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
106
|
+
elsif body =~ %r{<head/>}
|
107
|
+
# empty head, replace with the style block
|
108
|
+
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
133
109
|
else
|
134
110
|
# otherwise insert a head section before the body tag
|
135
|
-
body.gsub!(/<body/,
|
111
|
+
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
136
112
|
end
|
137
113
|
# Add the headers html table as first element in the body section
|
138
|
-
body.gsub!(/<body[^>]*>/) {|m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}"}
|
114
|
+
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
139
115
|
end
|
140
116
|
|
141
117
|
# Embed inline images
|
@@ -144,29 +120,23 @@ module Libis
|
|
144
120
|
used_files = []
|
145
121
|
|
146
122
|
# First process plaintext cid entries
|
147
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |
|
148
|
-
|
149
|
-
data = getAttachmentData(attachments, $1)
|
123
|
+
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
124
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
150
125
|
if data
|
151
|
-
|
152
|
-
used_files << $1
|
126
|
+
used_files << ::Regexp.last_match(1)
|
153
127
|
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
154
128
|
else
|
155
|
-
# puts "cid #{$1} not found"
|
156
129
|
'<img src=""/>'
|
157
130
|
end
|
158
131
|
end
|
159
|
-
|
132
|
+
|
160
133
|
# Then process HTML img tags with CID entries
|
161
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |
|
162
|
-
|
163
|
-
data
|
164
|
-
|
165
|
-
|
166
|
-
used_files << $1
|
167
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
134
|
+
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
135
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
136
|
+
if data
|
137
|
+
used_files << ::Regexp.last_match(1)
|
138
|
+
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
168
139
|
else
|
169
|
-
# puts "cid #{$1} not found"
|
170
140
|
''
|
171
141
|
end
|
172
142
|
end
|
@@ -176,59 +146,52 @@ module Libis
|
|
176
146
|
files = []
|
177
147
|
|
178
148
|
if target_format == :PDF
|
179
|
-
|
149
|
+
# PDF creation options
|
180
150
|
pdf_options = {
|
181
151
|
page_size: 'A4',
|
182
152
|
margin_top: '10mm',
|
183
153
|
margin_bottom: '10mm',
|
184
154
|
margin_left: '10mm',
|
185
155
|
margin_right: '10mm',
|
186
|
-
dpi: 300,
|
187
156
|
# image_quality: 100,
|
188
157
|
# viewport_size: '2480x3508',
|
158
|
+
dpi: 300
|
189
159
|
}.merge pdf_options
|
190
160
|
|
191
|
-
# pp pdf_options
|
192
|
-
# puts "Final HTML body:"
|
193
|
-
# pp body
|
194
161
|
subject = find_hdr(msg.headers, 'Subject')
|
195
162
|
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
196
163
|
pdf = kit.to_pdf
|
197
|
-
File.open(target, 'wb') {|f| f.write(pdf)}
|
198
|
-
# puts "message #{subject} converted to PDF file '#{target}'"
|
164
|
+
File.open(target, 'wb') { |f| f.write(pdf) }
|
199
165
|
else
|
200
|
-
File.open(target, 'wb') {|f| f.write(body)}
|
201
|
-
# puts "message #{subject} converted to HTML file '#{target}'"
|
166
|
+
File.open(target, 'wb') { |f| f.write(body) }
|
202
167
|
end
|
203
168
|
files << target if File.exist?(target)
|
204
169
|
|
205
170
|
# Save attachments
|
206
171
|
# ----------------
|
207
172
|
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
208
|
-
digits = ((attachments.count + 1)/ 10) + 1
|
173
|
+
digits = ((attachments.count + 1) / 10) + 1
|
209
174
|
i = 1
|
210
|
-
attachments.delete_if {|a| a.properties.attachment_hidden}.each do |a|
|
211
|
-
prefix = "#{
|
212
|
-
if sub_msg = a.instance_variable_get(:@embedded_msg)
|
213
|
-
|
214
|
-
subject = a.properties[:display_name] || sub_msg.subject || ""
|
175
|
+
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
176
|
+
prefix = "#{format('%0*d', digits, i)}-"
|
177
|
+
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
178
|
+
subject = a.properties[:display_name] || sub_msg.subject || ''
|
215
179
|
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
216
|
-
|
217
180
|
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
218
|
-
if e = result[:error]
|
219
|
-
raise
|
181
|
+
if (e = result[:error])
|
182
|
+
raise e
|
220
183
|
end
|
184
|
+
|
221
185
|
files += result[:files]
|
222
186
|
elsif a.filename
|
223
187
|
next if used_files.include?(a.filename)
|
224
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
225
188
|
|
189
|
+
file = File.join(outdir, "#{prefix}#{a.filename}")
|
226
190
|
FileUtils.mkdir_p(File.dirname(file))
|
227
|
-
File.open(file, 'wb') {|f| a.save(f)}
|
191
|
+
File.open(file, 'wb') { |f| a.save(f) }
|
228
192
|
files << file
|
229
|
-
# puts "Attachment file '#{file}' created"
|
230
193
|
else
|
231
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be
|
194
|
+
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
232
195
|
next
|
233
196
|
end
|
234
197
|
i += 1
|
@@ -240,28 +203,26 @@ module Libis
|
|
240
203
|
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
241
204
|
end
|
242
205
|
end
|
243
|
-
|
206
|
+
|
244
207
|
{
|
245
|
-
command: {status: 0},
|
246
|
-
files
|
247
|
-
headers
|
208
|
+
command: { status: 0 },
|
209
|
+
files:,
|
210
|
+
headers:,
|
248
211
|
warnings: @warnings
|
249
212
|
}
|
250
|
-
|
251
213
|
rescue Exception => e
|
252
|
-
# puts "ERROR: Exception #{e.class} raised: #{e.message}"
|
253
|
-
# e.backtrace.each {|t| puts " - #{t}"}
|
254
214
|
raise unless root_msg
|
215
|
+
|
255
216
|
msg.close
|
256
|
-
|
257
|
-
command: {status: -1},
|
217
|
+
{
|
218
|
+
command: { status: -1 },
|
258
219
|
files: [],
|
259
220
|
headers: {},
|
260
221
|
errors: [
|
261
222
|
{
|
262
223
|
error: e.message,
|
263
224
|
error_class: e.class.name,
|
264
|
-
error_trace: e.backtrace
|
225
|
+
error_trace: e.backtrace
|
265
226
|
}
|
266
227
|
],
|
267
228
|
warnings: @warnings
|
@@ -270,15 +231,13 @@ module Libis
|
|
270
231
|
|
271
232
|
protected
|
272
233
|
|
273
|
-
def eml_to_html
|
274
|
-
|
275
|
-
end
|
234
|
+
def eml_to_html; end
|
276
235
|
|
277
236
|
private
|
278
237
|
|
279
238
|
def find_hdr(list, key)
|
280
239
|
keys = list.keys
|
281
|
-
if k = keys.find {|x| x.to_s =~ /^#{key}$/i}
|
240
|
+
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
282
241
|
v = list[k]
|
283
242
|
v = v.first if v.is_a? Array
|
284
243
|
v = Rfc2047.decode(v).strip if v.is_a? String
|
@@ -288,27 +247,23 @@ module Libis
|
|
288
247
|
end
|
289
248
|
|
290
249
|
def hdr_html(key, value)
|
291
|
-
return HEADER_FIELD_TEMPLATE
|
250
|
+
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
251
|
+
|
292
252
|
''
|
293
253
|
end
|
294
254
|
|
295
|
-
def
|
255
|
+
def get_attachment_data(attachments, cid)
|
296
256
|
attachments.each do |attachment|
|
297
|
-
|
298
|
-
|
299
|
-
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
257
|
+
next unless attachment.properties.attach_content_id == cid
|
258
|
+
|
259
|
+
attachment.data.rewind
|
260
|
+
return {
|
261
|
+
mime_type: attachment.properties.attach_mime_tag,
|
262
|
+
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
263
|
+
}
|
304
264
|
end
|
305
265
|
nil
|
306
266
|
end
|
307
|
-
|
308
|
-
def read_header(headers_file)
|
309
|
-
headers = YAML.load_file(headers_file)
|
310
|
-
headers.symbolize_keys
|
311
|
-
end
|
312
267
|
end
|
313
268
|
end
|
314
269
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'fileutils'
|
2
4
|
|
3
5
|
require 'libis/tools/extend/string'
|
@@ -9,60 +11,61 @@ require 'libis/format/config'
|
|
9
11
|
module Libis
|
10
12
|
module Format
|
11
13
|
module Tool
|
12
|
-
|
13
14
|
class OfficeToPdf
|
14
15
|
include ::Libis::Tools::Logger
|
15
16
|
|
16
17
|
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd],
|
18
|
-
result[:status]
|
18
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
+
(result[:status]).zero?
|
19
20
|
end
|
20
21
|
|
21
|
-
def self.run(source, target, options
|
22
|
-
|
22
|
+
def self.run(source, target, **options)
|
23
|
+
new.run source, target, **options
|
23
24
|
end
|
24
25
|
|
25
|
-
def run(source, target, options
|
26
|
+
def run(source, target, **options)
|
26
27
|
workdir = '/...'
|
27
28
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
28
29
|
|
29
|
-
workdir = File.join(workdir, rand(
|
30
|
+
workdir = File.join(workdir, rand(1_000_000).to_s)
|
30
31
|
FileUtils.mkpath(workdir)
|
31
32
|
|
32
33
|
src_file = File.join(workdir, File.basename(source))
|
33
34
|
FileUtils.symlink source, src_file
|
34
35
|
|
35
|
-
tgt_file = File.join(workdir, File.basename(source, '.*')
|
36
|
+
tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
|
36
37
|
|
37
38
|
export_filter = options[:export_filter] || 'pdf'
|
38
39
|
|
39
40
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
40
41
|
result = Libis::Tools::Command.run(
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
42
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
43
|
+
"-env:UserInstallation=file://#{workdir}",
|
44
|
+
'--convert-to', export_filter,
|
45
|
+
'--outdir', workdir, src_file,
|
46
|
+
timeout:,
|
47
|
+
kill_after: timeout * 2
|
47
48
|
)
|
48
49
|
|
49
|
-
raise
|
50
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
51
|
+
|
50
52
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
51
|
-
raise
|
53
|
+
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
52
54
|
|
53
55
|
FileUtils.copy tgt_file, target, preserve: true
|
54
56
|
|
55
57
|
{
|
56
58
|
command: result,
|
57
|
-
files: [
|
59
|
+
files: [target]
|
58
60
|
}
|
59
|
-
|
60
61
|
ensure
|
61
|
-
|
62
|
-
|
62
|
+
begin
|
63
|
+
FileUtils.rmtree workdir
|
64
|
+
rescue StandardError
|
65
|
+
nil
|
66
|
+
end
|
63
67
|
end
|
64
68
|
end
|
65
|
-
|
66
69
|
end
|
67
70
|
end
|
68
71
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'os'
|
2
4
|
|
3
5
|
require 'libis/tools/extend/string'
|
@@ -9,49 +11,46 @@ require 'libis/format/config'
|
|
9
11
|
module Libis
|
10
12
|
module Format
|
11
13
|
module Tool
|
12
|
-
|
13
14
|
class PdfSplit
|
14
15
|
include ::Libis::Tools::Logger
|
15
16
|
|
16
17
|
def self.installed?
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd],
|
18
|
-
return false unless result[:status]
|
18
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
|
19
|
+
return false unless (result[:status]).zero?
|
20
|
+
|
19
21
|
File.exist?(Libis::Format::Config[:pdf_tool])
|
20
22
|
end
|
21
23
|
|
22
|
-
def self.run(source, target,
|
23
|
-
|
24
|
+
def self.run(source, target, *args)
|
25
|
+
new.run source, target, *args
|
24
26
|
end
|
25
27
|
|
26
|
-
def run(source, target,
|
27
|
-
|
28
|
+
def run(source, target, *args)
|
28
29
|
if OS.java?
|
29
30
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
30
31
|
end
|
31
32
|
|
32
33
|
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
33
34
|
result = Libis::Tools::Command.run(
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
35
|
+
Libis::Format::Config[:java_cmd],
|
36
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
37
|
+
'SplitPdf',
|
38
|
+
'--file_input', source,
|
39
|
+
'--file_output', target,
|
40
|
+
*args,
|
41
|
+
timeout:,
|
42
|
+
kill_after: timeout * 2
|
42
43
|
)
|
43
44
|
|
44
|
-
raise
|
45
|
-
raise
|
45
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
+
raise "#{self.class} errors: #{result[:err].join("\n")}" unless (result[:status]).zero? && result[:err].empty?
|
46
47
|
|
47
48
|
{
|
48
49
|
command: result,
|
49
|
-
files: [
|
50
|
+
files: [target] # TODO: collect the files
|
50
51
|
}
|
51
|
-
|
52
52
|
end
|
53
53
|
end
|
54
|
-
|
55
54
|
end
|
56
55
|
end
|
57
56
|
end
|