libis-format 1.2.6 → 1.2.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -1
- data/lib/libis/format/cli/prompt_helper.rb +32 -24
- data/lib/libis/format/converter/audio_converter.rb +23 -30
- data/lib/libis/format/identifier.rb +1 -1
- data/lib/libis/format/tool/extension_identification.rb +23 -25
- data/lib/libis/format/tool/fido.rb +22 -27
- data/lib/libis/format/tool/file_tool.rb +11 -24
- data/lib/libis/format/tool/fop_pdf.rb +19 -20
- data/lib/libis/format/tool/identification_tool.rb +34 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +84 -114
- data/lib/libis/format/tool/office_to_pdf.rb +24 -21
- data/lib/libis/format/tool/pdf_split.rb +19 -20
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -30
- data/lib/libis/format/tool/pdfa_validator.rb +16 -14
- data/lib/libis/format/version.rb +3 -1
- data/libis-format.gemspec +23 -22
- data/tools/fop/fop.bat +75 -75
- data/tools/fop/fop.cmd +31 -31
- data/tools/fop/fop.js +341 -341
- data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
- data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
- data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
- data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
- metadata +62 -72
- data/.coveralls.yml +0 -2
- data/.gitignore +0 -21
- data/.travis.yml +0 -74
- data/.vscode/launch.json +0 -21
- data/Rakefile +0 -12
- data/base/Dockerfile +0 -33
- data/base/Dockerfile.alpine +0 -20
- data/base/Dockerfile.rvm +0 -56
- data/base/rework_path +0 -25
- data/docker_cfg.yml +0 -1
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'csv'
|
|
2
4
|
require 'tmpdir'
|
|
3
5
|
|
|
@@ -12,42 +14,41 @@ require 'libis/format/type_database'
|
|
|
12
14
|
module Libis
|
|
13
15
|
module Format
|
|
14
16
|
module Tool
|
|
15
|
-
|
|
16
|
-
class IdentificationTool
|
|
17
|
+
class IdentificationTool
|
|
17
18
|
include Singleton
|
|
18
19
|
include ::Libis::Tools::Logger
|
|
19
20
|
|
|
20
21
|
def self.bad_mimetype(mimetype)
|
|
21
|
-
|
|
22
|
+
instance.bad_mimetype(mimetype)
|
|
22
23
|
end
|
|
23
24
|
|
|
24
|
-
def self.run(file, recursive = false, options
|
|
25
|
-
options ||= {}
|
|
25
|
+
def self.run(file, recursive = false, **options)
|
|
26
26
|
if file.is_a?(Array)
|
|
27
|
-
return run_list file, options
|
|
28
|
-
elsif file.is_a?(String) && File.
|
|
27
|
+
return run_list file, **options
|
|
28
|
+
elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
|
|
29
29
|
if File.directory?(file)
|
|
30
|
-
return run_dir(file, recursive, options)
|
|
30
|
+
return run_dir(file, recursive, **options)
|
|
31
31
|
elsif File.file?(file)
|
|
32
|
-
return
|
|
32
|
+
return instance.run(file, **options)
|
|
33
33
|
end
|
|
34
34
|
end
|
|
35
|
+
|
|
35
36
|
raise ArgumentError,
|
|
36
37
|
'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
|
|
37
38
|
end
|
|
38
39
|
|
|
39
|
-
def self.run_dir(file, recursive = true, options
|
|
40
|
-
|
|
40
|
+
def self.run_dir(file, recursive = true, **options)
|
|
41
|
+
instance.run_dir file, recursive, **options
|
|
41
42
|
end
|
|
42
43
|
|
|
43
|
-
def self.run_list(filelist
|
|
44
|
-
|
|
44
|
+
def self.run_list(filelist, **options)
|
|
45
|
+
instance.run_list filelist, **options
|
|
45
46
|
end
|
|
46
47
|
|
|
47
48
|
protected
|
|
48
49
|
|
|
49
50
|
def create_list_file(filelist)
|
|
50
|
-
list_file = Tempfile.new(%w
|
|
51
|
+
list_file = Tempfile.new(%w[file .list])
|
|
51
52
|
filelist.each do |fname|
|
|
52
53
|
list_file.write "#{fname}\n"
|
|
53
54
|
end
|
|
@@ -83,20 +84,19 @@ module Libis
|
|
|
83
84
|
# { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
|
|
84
85
|
#
|
|
85
86
|
def process_output(output)
|
|
86
|
-
output.
|
|
87
|
+
output.each_with_object({}) do |x, results|
|
|
87
88
|
filepath = File.absolute_path(x.delete(:filepath)).freeze
|
|
88
89
|
results[filepath] ||= []
|
|
89
90
|
results[filepath] << annotate(x)
|
|
90
|
-
results
|
|
91
91
|
end
|
|
92
92
|
end
|
|
93
93
|
|
|
94
94
|
# Enhance the output with mimetype and score
|
|
95
95
|
def annotate(result)
|
|
96
96
|
# Enhance result with mimetype if needed
|
|
97
|
-
|
|
97
|
+
bad_mimetypes.include?(result[:mimetype]) &&
|
|
98
|
+
!bad_puids.include?(result[:puid]) &&
|
|
98
99
|
result[:mimetype] = get_mimetype(result[:puid])
|
|
99
|
-
end
|
|
100
100
|
|
|
101
101
|
# Normalize the mimetype
|
|
102
102
|
Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
|
|
@@ -108,55 +108,54 @@ module Libis
|
|
|
108
108
|
result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
|
|
109
109
|
|
|
110
110
|
# freeze all strings
|
|
111
|
-
result.each {|_, v| v.freeze if v.is_a?(String)}
|
|
111
|
+
result.each { |_, v| v.freeze if v.is_a?(String) }
|
|
112
112
|
|
|
113
113
|
# Adapt score based on matchtype
|
|
114
114
|
result[:matchtype] = result[:matchtype].to_s.downcase
|
|
115
115
|
case result[:matchtype]
|
|
116
116
|
|
|
117
117
|
# Signature match increases score with 2
|
|
118
|
-
|
|
119
|
-
|
|
118
|
+
when 'signature'
|
|
119
|
+
result[:score] += 2
|
|
120
120
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
|
121
121
|
# ext = File.extname(result[:filename])
|
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
|
123
123
|
|
|
124
124
|
# Container match increases score with 4
|
|
125
|
-
|
|
126
|
-
|
|
125
|
+
when 'container'
|
|
126
|
+
result[:score] += 4
|
|
127
127
|
# typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
|
|
128
128
|
# ext = File.extname(result[:filename])
|
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
|
130
130
|
|
|
131
131
|
# Extension match is the weakest identification; score is lowered by 2 points
|
|
132
|
-
|
|
133
|
-
|
|
132
|
+
when 'extension'
|
|
133
|
+
result[:score] -= 2
|
|
134
134
|
|
|
135
135
|
# Magic code (file tool) is to be trused even less
|
|
136
|
-
|
|
137
|
-
|
|
136
|
+
when 'magic'
|
|
137
|
+
result[:score] -= 3
|
|
138
138
|
|
|
139
|
-
# Or no change otherwise
|
|
140
|
-
else
|
|
141
|
-
# do nothing
|
|
142
139
|
end
|
|
143
140
|
|
|
144
141
|
# Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
|
|
145
142
|
# Office OpenXML, OpenDocument, jar, maff, svx)
|
|
146
|
-
if result[:mimetype] == 'application/zip'
|
|
147
|
-
result[:score] -= 2
|
|
148
|
-
end
|
|
143
|
+
result[:score] -= 2 if result[:mimetype] == 'application/zip'
|
|
149
144
|
|
|
150
145
|
# Return result enhanced with mimetype and score fields
|
|
151
146
|
result
|
|
152
147
|
end
|
|
153
148
|
|
|
154
149
|
def get_mimetype(puid)
|
|
155
|
-
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
|
150
|
+
::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
|
|
151
|
+
rescue StandardError
|
|
152
|
+
nil
|
|
156
153
|
end
|
|
157
154
|
|
|
158
155
|
def get_puid(mimetype)
|
|
159
|
-
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
|
156
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
|
|
157
|
+
rescue StandardError
|
|
158
|
+
nil
|
|
160
159
|
end
|
|
161
160
|
|
|
162
161
|
attr_accessor :bad_mimetypes, :bad_puids
|
|
@@ -170,7 +169,6 @@ module Libis
|
|
|
170
169
|
@bad_mimetypes << mimetype
|
|
171
170
|
end
|
|
172
171
|
end
|
|
173
|
-
|
|
174
172
|
end
|
|
175
173
|
end
|
|
176
174
|
end
|
|
@@ -1,12 +1,12 @@
|
|
|
1
|
-
#
|
|
1
|
+
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require 'mapi/msg'
|
|
4
4
|
require 'rfc_2047'
|
|
5
5
|
require 'cgi'
|
|
6
6
|
require 'pdfkit'
|
|
7
|
-
|
|
7
|
+
require 'time'
|
|
8
8
|
require 'fileutils'
|
|
9
|
-
|
|
9
|
+
require 'pathname'
|
|
10
10
|
require 'libis/format/config'
|
|
11
11
|
|
|
12
12
|
module Libis
|
|
@@ -15,23 +15,23 @@ module Libis
|
|
|
15
15
|
class MsgToPdf
|
|
16
16
|
include ::Libis::Tools::Logger
|
|
17
17
|
|
|
18
|
-
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {background: white
|
|
18
|
+
HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
|
|
19
19
|
HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
|
|
20
20
|
HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
|
|
21
|
-
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>'
|
|
21
|
+
HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
|
|
22
22
|
|
|
23
|
-
IMG_CID_PLAIN_REGEX =
|
|
24
|
-
IMG_CID_HTML_REGEX =
|
|
23
|
+
IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
|
|
24
|
+
IMG_CID_HTML_REGEX = /cid:([^"]*)/m
|
|
25
25
|
|
|
26
26
|
def self.installed?
|
|
27
27
|
File.exist?(Libis::Format::Config[:wkhtmltopdf])
|
|
28
28
|
end
|
|
29
29
|
|
|
30
|
-
def self.run(source, target, options
|
|
31
|
-
new.run source, target, options
|
|
30
|
+
def self.run(source, target, **options)
|
|
31
|
+
new.run source, target, **options
|
|
32
32
|
end
|
|
33
33
|
|
|
34
|
-
def run(source, target, options
|
|
34
|
+
def run(source, target, **options)
|
|
35
35
|
# Preliminary checks
|
|
36
36
|
# ------------------
|
|
37
37
|
|
|
@@ -52,41 +52,16 @@ module Libis
|
|
|
52
52
|
result
|
|
53
53
|
end
|
|
54
54
|
|
|
55
|
-
def msg_to_pdf(msg, target, target_format, pdf_options,
|
|
56
|
-
|
|
55
|
+
def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
|
|
57
56
|
# Make sure the target directory exists
|
|
58
57
|
outdir = File.dirname(target)
|
|
59
58
|
FileUtils.mkdir_p(outdir)
|
|
60
59
|
|
|
61
|
-
# puts "Headers:"
|
|
62
|
-
# puts '--------'
|
|
63
|
-
# pp msg.headers
|
|
64
|
-
|
|
65
|
-
# puts "Recipients:"
|
|
66
|
-
# puts '-----------'
|
|
67
|
-
# pp msg.recipients
|
|
68
|
-
|
|
69
|
-
# puts "Body:"
|
|
70
|
-
# puts '-----'
|
|
71
|
-
# puts msg.properties.body
|
|
72
|
-
# puts '-----'
|
|
73
|
-
# puts msg.properties.body_rtf
|
|
74
|
-
# puts '-----'
|
|
75
|
-
# puts msg.properties.body_html
|
|
76
|
-
|
|
77
|
-
# puts "Attachments:"
|
|
78
|
-
# puts '------------'
|
|
79
|
-
# msg.attachments.each {|a| p "#{a.filename} - #{a.properties.attach_content_id}"}
|
|
80
|
-
|
|
81
|
-
# puts "Converting:"
|
|
82
|
-
# puts '-----------'
|
|
83
|
-
|
|
84
60
|
# Get the body of the message in HTML
|
|
85
61
|
body = msg.properties.body_html
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
end
|
|
62
|
+
|
|
63
|
+
# Embed plain body in HTML as a fallback
|
|
64
|
+
body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
|
|
90
65
|
|
|
91
66
|
# Check and fix the character encoding
|
|
92
67
|
begin
|
|
@@ -94,8 +69,8 @@ module Libis
|
|
|
94
69
|
body.encode!('UTF-8', universal_newline: true)
|
|
95
70
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
|
|
96
71
|
begin
|
|
97
|
-
|
|
98
|
-
|
|
72
|
+
# If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
|
|
73
|
+
body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
|
|
99
74
|
rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
|
|
100
75
|
# If that fails too, log a warning and replace the invalid/unknown with a ? character.
|
|
101
76
|
@warnings << "#{e.class}: #{e.message}"
|
|
@@ -108,7 +83,7 @@ module Libis
|
|
|
108
83
|
headers = {}
|
|
109
84
|
hdr_html = ''
|
|
110
85
|
|
|
111
|
-
%w
|
|
86
|
+
%w[From To Cc Subject Date].each do |key|
|
|
112
87
|
value = find_hdr(msg.headers, key)
|
|
113
88
|
if value
|
|
114
89
|
headers[key.downcase.to_sym] = value
|
|
@@ -116,18 +91,27 @@ module Libis
|
|
|
116
91
|
end
|
|
117
92
|
end
|
|
118
93
|
|
|
94
|
+
[:date].each do |key|
|
|
95
|
+
next unless headers[key]
|
|
96
|
+
|
|
97
|
+
headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
|
|
98
|
+
end
|
|
99
|
+
|
|
119
100
|
# Add header section to the HTML body
|
|
120
101
|
unless hdr_html.empty?
|
|
121
102
|
# Insert header block styles
|
|
122
|
-
if body =~
|
|
103
|
+
if body =~ %r{</head>}
|
|
123
104
|
# if head exists, append the style block
|
|
124
|
-
body.gsub!(
|
|
105
|
+
body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
|
|
106
|
+
elsif body =~ %r{<head/>}
|
|
107
|
+
# empty head, replace with the style block
|
|
108
|
+
body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
|
|
125
109
|
else
|
|
126
110
|
# otherwise insert a head section before the body tag
|
|
127
|
-
body.gsub!(/<body/,
|
|
111
|
+
body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
|
|
128
112
|
end
|
|
129
113
|
# Add the headers html table as first element in the body section
|
|
130
|
-
body.gsub!(/<body[^>]*>/) {|m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}"}
|
|
114
|
+
body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
|
|
131
115
|
end
|
|
132
116
|
|
|
133
117
|
# Embed inline images
|
|
@@ -136,29 +120,23 @@ module Libis
|
|
|
136
120
|
used_files = []
|
|
137
121
|
|
|
138
122
|
# First process plaintext cid entries
|
|
139
|
-
body.gsub!(IMG_CID_PLAIN_REGEX) do |
|
|
140
|
-
|
|
141
|
-
data = getAttachmentData(attachments, $1)
|
|
123
|
+
body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
|
|
124
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
|
142
125
|
if data
|
|
143
|
-
|
|
144
|
-
used_files << $1
|
|
126
|
+
used_files << ::Regexp.last_match(1)
|
|
145
127
|
"<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
|
|
146
128
|
else
|
|
147
|
-
# puts "cid #{$1} not found"
|
|
148
129
|
'<img src=""/>'
|
|
149
130
|
end
|
|
150
131
|
end
|
|
151
|
-
|
|
132
|
+
|
|
152
133
|
# Then process HTML img tags with CID entries
|
|
153
|
-
body.gsub!(IMG_CID_HTML_REGEX) do |
|
|
154
|
-
|
|
155
|
-
data
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
used_files << $1
|
|
159
|
-
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
|
134
|
+
body.gsub!(IMG_CID_HTML_REGEX) do |_match|
|
|
135
|
+
data = get_attachment_data(attachments, ::Regexp.last_match(1))
|
|
136
|
+
if data
|
|
137
|
+
used_files << ::Regexp.last_match(1)
|
|
138
|
+
"data:#{data[:mime_type]};base64,#{data[:base64]}"
|
|
160
139
|
else
|
|
161
|
-
# puts "cid #{$1} not found"
|
|
162
140
|
''
|
|
163
141
|
end
|
|
164
142
|
end
|
|
@@ -168,85 +146,83 @@ module Libis
|
|
|
168
146
|
files = []
|
|
169
147
|
|
|
170
148
|
if target_format == :PDF
|
|
171
|
-
|
|
149
|
+
# PDF creation options
|
|
172
150
|
pdf_options = {
|
|
173
151
|
page_size: 'A4',
|
|
174
152
|
margin_top: '10mm',
|
|
175
153
|
margin_bottom: '10mm',
|
|
176
154
|
margin_left: '10mm',
|
|
177
155
|
margin_right: '10mm',
|
|
178
|
-
dpi: 300,
|
|
179
156
|
# image_quality: 100,
|
|
180
157
|
# viewport_size: '2480x3508',
|
|
158
|
+
dpi: 300
|
|
181
159
|
}.merge pdf_options
|
|
182
160
|
|
|
183
|
-
# pp pdf_options
|
|
184
|
-
# puts "Final HTML body:"
|
|
185
|
-
# pp body
|
|
186
161
|
subject = find_hdr(msg.headers, 'Subject')
|
|
187
162
|
kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
|
|
188
163
|
pdf = kit.to_pdf
|
|
189
|
-
File.open(target, 'wb') {|f| f.write(pdf)}
|
|
190
|
-
# puts "message #{subject} converted to PDF file '#{target}'"
|
|
164
|
+
File.open(target, 'wb') { |f| f.write(pdf) }
|
|
191
165
|
else
|
|
192
|
-
File.open(target, 'wb') {|f| f.write(body)}
|
|
193
|
-
# puts "message #{subject} converted to HTML file '#{target}'"
|
|
166
|
+
File.open(target, 'wb') { |f| f.write(body) }
|
|
194
167
|
end
|
|
195
168
|
files << target if File.exist?(target)
|
|
196
169
|
|
|
197
170
|
# Save attachments
|
|
198
171
|
# ----------------
|
|
199
172
|
outdir = File.join(outdir, "#{File.basename(target)}.attachments")
|
|
200
|
-
digits = ((attachments.count + 1)/ 10) + 1
|
|
173
|
+
digits = ((attachments.count + 1) / 10) + 1
|
|
201
174
|
i = 1
|
|
202
|
-
attachments.delete_if {|a| a.properties.attachment_hidden}.each do |a|
|
|
203
|
-
prefix = "#{
|
|
204
|
-
if sub_msg = a.instance_variable_get(:@embedded_msg)
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
208
|
-
|
|
209
|
-
|
|
210
|
-
if e = result[:error]
|
|
211
|
-
raise
|
|
175
|
+
attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
|
|
176
|
+
prefix = "#{format('%0*d', digits, i)}-"
|
|
177
|
+
if (sub_msg = a.instance_variable_get(:@embedded_msg))
|
|
178
|
+
subject = a.properties[:display_name] || sub_msg.subject || ''
|
|
179
|
+
file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
|
|
180
|
+
result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
|
|
181
|
+
if (e = result[:error])
|
|
182
|
+
raise e
|
|
212
183
|
end
|
|
184
|
+
|
|
213
185
|
files += result[:files]
|
|
214
186
|
elsif a.filename
|
|
215
187
|
next if used_files.include?(a.filename)
|
|
216
|
-
file = File.join(outdir, "#{prefix}#{a.filename}")
|
|
217
188
|
|
|
189
|
+
file = File.join(outdir, "#{prefix}#{a.filename}")
|
|
218
190
|
FileUtils.mkdir_p(File.dirname(file))
|
|
219
|
-
File.open(file, 'wb') {|f| a.save(f)}
|
|
191
|
+
File.open(file, 'wb') { |f| a.save(f) }
|
|
220
192
|
files << file
|
|
221
|
-
# puts "Attachment file '#{file}' created"
|
|
222
193
|
else
|
|
223
|
-
@warnings << "Attachment #{a.properties[:display_name]} cannot be
|
|
194
|
+
@warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
|
|
224
195
|
next
|
|
225
196
|
end
|
|
226
197
|
i += 1
|
|
227
198
|
end
|
|
228
|
-
|
|
199
|
+
|
|
200
|
+
if root_msg
|
|
201
|
+
p = Pathname(File.dirname(files.first))
|
|
202
|
+
files[1..].each do |f|
|
|
203
|
+
(headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
|
|
204
|
+
end
|
|
205
|
+
end
|
|
206
|
+
|
|
229
207
|
{
|
|
230
|
-
command: {status: 0},
|
|
231
|
-
files
|
|
232
|
-
headers
|
|
208
|
+
command: { status: 0 },
|
|
209
|
+
files:,
|
|
210
|
+
headers:,
|
|
233
211
|
warnings: @warnings
|
|
234
212
|
}
|
|
235
|
-
|
|
236
213
|
rescue Exception => e
|
|
237
|
-
|
|
238
|
-
|
|
239
|
-
raise if reraise
|
|
214
|
+
raise unless root_msg
|
|
215
|
+
|
|
240
216
|
msg.close
|
|
241
|
-
|
|
242
|
-
command: {status: -1},
|
|
217
|
+
{
|
|
218
|
+
command: { status: -1 },
|
|
243
219
|
files: [],
|
|
244
220
|
headers: {},
|
|
245
221
|
errors: [
|
|
246
222
|
{
|
|
247
223
|
error: e.message,
|
|
248
224
|
error_class: e.class.name,
|
|
249
|
-
error_trace: e.backtrace
|
|
225
|
+
error_trace: e.backtrace
|
|
250
226
|
}
|
|
251
227
|
],
|
|
252
228
|
warnings: @warnings
|
|
@@ -255,15 +231,13 @@ module Libis
|
|
|
255
231
|
|
|
256
232
|
protected
|
|
257
233
|
|
|
258
|
-
def eml_to_html
|
|
259
|
-
|
|
260
|
-
end
|
|
234
|
+
def eml_to_html; end
|
|
261
235
|
|
|
262
236
|
private
|
|
263
237
|
|
|
264
238
|
def find_hdr(list, key)
|
|
265
239
|
keys = list.keys
|
|
266
|
-
if k = keys.find {|x| x.to_s =~ /^#{key}$/i}
|
|
240
|
+
if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
|
|
267
241
|
v = list[k]
|
|
268
242
|
v = v.first if v.is_a? Array
|
|
269
243
|
v = Rfc2047.decode(v).strip if v.is_a? String
|
|
@@ -273,27 +247,23 @@ module Libis
|
|
|
273
247
|
end
|
|
274
248
|
|
|
275
249
|
def hdr_html(key, value)
|
|
276
|
-
return HEADER_FIELD_TEMPLATE
|
|
250
|
+
return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
|
|
251
|
+
|
|
277
252
|
''
|
|
278
253
|
end
|
|
279
254
|
|
|
280
|
-
def
|
|
255
|
+
def get_attachment_data(attachments, cid)
|
|
281
256
|
attachments.each do |attachment|
|
|
282
|
-
|
|
283
|
-
|
|
284
|
-
|
|
285
|
-
|
|
286
|
-
|
|
287
|
-
|
|
288
|
-
|
|
257
|
+
next unless attachment.properties.attach_content_id == cid
|
|
258
|
+
|
|
259
|
+
attachment.data.rewind
|
|
260
|
+
return {
|
|
261
|
+
mime_type: attachment.properties.attach_mime_tag,
|
|
262
|
+
base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
|
|
263
|
+
}
|
|
289
264
|
end
|
|
290
265
|
nil
|
|
291
266
|
end
|
|
292
|
-
|
|
293
|
-
def read_header(headers_file)
|
|
294
|
-
headers = YAML.load_file(headers_file)
|
|
295
|
-
headers.symbolize_keys
|
|
296
|
-
end
|
|
297
267
|
end
|
|
298
268
|
end
|
|
299
269
|
end
|
|
@@ -1,3 +1,5 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
1
3
|
require 'fileutils'
|
|
2
4
|
|
|
3
5
|
require 'libis/tools/extend/string'
|
|
@@ -9,60 +11,61 @@ require 'libis/format/config'
|
|
|
9
11
|
module Libis
|
|
10
12
|
module Format
|
|
11
13
|
module Tool
|
|
12
|
-
|
|
13
14
|
class OfficeToPdf
|
|
14
15
|
include ::Libis::Tools::Logger
|
|
15
16
|
|
|
16
17
|
def self.installed?
|
|
17
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd],
|
|
18
|
-
result[:status]
|
|
18
|
+
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
|
19
|
+
(result[:status]).zero?
|
|
19
20
|
end
|
|
20
21
|
|
|
21
|
-
def self.run(source, target, options
|
|
22
|
-
|
|
22
|
+
def self.run(source, target, **options)
|
|
23
|
+
new.run source, target, **options
|
|
23
24
|
end
|
|
24
25
|
|
|
25
|
-
def run(source, target, options
|
|
26
|
+
def run(source, target, **options)
|
|
26
27
|
workdir = '/...'
|
|
27
28
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
|
28
29
|
|
|
29
|
-
workdir = File.join(workdir, rand(
|
|
30
|
+
workdir = File.join(workdir, rand(1_000_000).to_s)
|
|
30
31
|
FileUtils.mkpath(workdir)
|
|
31
32
|
|
|
32
33
|
src_file = File.join(workdir, File.basename(source))
|
|
33
34
|
FileUtils.symlink source, src_file
|
|
34
35
|
|
|
35
|
-
tgt_file = File.join(workdir, File.basename(source, '.*')
|
|
36
|
+
tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
|
|
36
37
|
|
|
37
38
|
export_filter = options[:export_filter] || 'pdf'
|
|
38
39
|
|
|
39
40
|
timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
|
|
40
41
|
result = Libis::Tools::Command.run(
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
|
|
42
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
|
43
|
+
"-env:UserInstallation=file://#{workdir}",
|
|
44
|
+
'--convert-to', export_filter,
|
|
45
|
+
'--outdir', workdir, src_file,
|
|
46
|
+
timeout:,
|
|
47
|
+
kill_after: timeout * 2
|
|
47
48
|
)
|
|
48
49
|
|
|
49
|
-
raise
|
|
50
|
+
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
|
51
|
+
|
|
50
52
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
|
51
|
-
raise
|
|
53
|
+
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
|
52
54
|
|
|
53
55
|
FileUtils.copy tgt_file, target, preserve: true
|
|
54
56
|
|
|
55
57
|
{
|
|
56
58
|
command: result,
|
|
57
|
-
files: [
|
|
59
|
+
files: [target]
|
|
58
60
|
}
|
|
59
|
-
|
|
60
61
|
ensure
|
|
61
|
-
|
|
62
|
-
|
|
62
|
+
begin
|
|
63
|
+
FileUtils.rmtree workdir
|
|
64
|
+
rescue StandardError
|
|
65
|
+
nil
|
|
66
|
+
end
|
|
63
67
|
end
|
|
64
68
|
end
|
|
65
|
-
|
|
66
69
|
end
|
|
67
70
|
end
|
|
68
71
|
end
|