libis-format 1.2.7 → 1.2.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'csv'
2
4
  require 'tmpdir'
3
5
 
@@ -12,42 +14,41 @@ require 'libis/format/type_database'
12
14
  module Libis
13
15
  module Format
14
16
  module Tool
15
-
16
- class IdentificationTool
17
+ class IdentificationTool
17
18
  include Singleton
18
19
  include ::Libis::Tools::Logger
19
20
 
20
21
  def self.bad_mimetype(mimetype)
21
- self.instance.bad_mimetype(mimetype)
22
+ instance.bad_mimetype(mimetype)
22
23
  end
23
24
 
24
- def self.run(file, recursive = false, options = {})
25
- options ||= {}
25
+ def self.run(file, recursive = false, **options)
26
26
  if file.is_a?(Array)
27
- return run_list file, options
28
- elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
27
+ return run_list file, **options
28
+ elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
29
29
  if File.directory?(file)
30
- return run_dir(file, recursive, options)
30
+ return run_dir(file, recursive, **options)
31
31
  elsif File.file?(file)
32
- return self.instance.run(file, options)
32
+ return instance.run(file, **options)
33
33
  end
34
34
  end
35
+
35
36
  raise ArgumentError,
36
37
  'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
37
38
  end
38
39
 
39
- def self.run_dir(file, recursive = true, options = {})
40
- self.instance.run_dir file, recursive, options
40
+ def self.run_dir(file, recursive = true, **options)
41
+ instance.run_dir file, recursive, **options
41
42
  end
42
43
 
43
- def self.run_list(filelist , options = {})
44
- self.instance.run_list filelist, options
44
+ def self.run_list(filelist, **options)
45
+ instance.run_list filelist, **options
45
46
  end
46
47
 
47
48
  protected
48
49
 
49
50
  def create_list_file(filelist)
50
- list_file = Tempfile.new(%w'file .list')
51
+ list_file = Tempfile.new(%w[file .list])
51
52
  filelist.each do |fname|
52
53
  list_file.write "#{fname}\n"
53
54
  end
@@ -83,20 +84,19 @@ module Libis
83
84
  # { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
84
85
  #
85
86
  def process_output(output)
86
- output.reduce({}) do |results, x|
87
+ output.each_with_object({}) do |x, results|
87
88
  filepath = File.absolute_path(x.delete(:filepath)).freeze
88
89
  results[filepath] ||= []
89
90
  results[filepath] << annotate(x)
90
- results
91
91
  end
92
92
  end
93
93
 
94
94
  # Enhance the output with mimetype and score
95
95
  def annotate(result)
96
96
  # Enhance result with mimetype if needed
97
- if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
97
+ bad_mimetypes.include?(result[:mimetype]) &&
98
+ !bad_puids.include?(result[:puid]) &&
98
99
  result[:mimetype] = get_mimetype(result[:puid])
99
- end
100
100
 
101
101
  # Normalize the mimetype
102
102
  Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
@@ -108,55 +108,54 @@ module Libis
108
108
  result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
109
109
 
110
110
  # freeze all strings
111
- result.each {|_, v| v.freeze if v.is_a?(String)}
111
+ result.each { |_, v| v.freeze if v.is_a?(String) }
112
112
 
113
113
  # Adapt score based on matchtype
114
114
  result[:matchtype] = result[:matchtype].to_s.downcase
115
115
  case result[:matchtype]
116
116
 
117
117
  # Signature match increases score with 2
118
- when 'signature'
119
- result[:score] += 2
118
+ when 'signature'
119
+ result[:score] += 2
120
120
  # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
121
121
  # ext = File.extname(result[:filename])
122
122
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
123
123
 
124
124
  # Container match increases score with 4
125
- when 'container'
126
- result[:score] += 4
125
+ when 'container'
126
+ result[:score] += 4
127
127
  # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
128
128
  # ext = File.extname(result[:filename])
129
129
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
130
130
 
131
131
  # Extension match is the weakest identification; score is lowered by 2 points
132
- when 'extension'
133
- result[:score] -= 2
132
+ when 'extension'
133
+ result[:score] -= 2
134
134
 
135
135
  # Magic code (file tool) is to be trused even less
136
- when 'magic'
137
- result[:score] -= 3
136
+ when 'magic'
137
+ result[:score] -= 3
138
138
 
139
- # Or no change otherwise
140
- else
141
- # do nothing
142
139
  end
143
140
 
144
141
  # Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
145
142
  # Office OpenXML, OpenDocument, jar, maff, svx)
146
- if result[:mimetype] == 'application/zip'
147
- result[:score] -= 2
148
- end
143
+ result[:score] -= 2 if result[:mimetype] == 'application/zip'
149
144
 
150
145
  # Return result enhanced with mimetype and score fields
151
146
  result
152
147
  end
153
148
 
154
149
  def get_mimetype(puid)
155
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
150
+ ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
151
+ rescue StandardError
152
+ nil
156
153
  end
157
154
 
158
155
  def get_puid(mimetype)
159
- ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
156
+ ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
157
+ rescue StandardError
158
+ nil
160
159
  end
161
160
 
162
161
  attr_accessor :bad_mimetypes, :bad_puids
@@ -170,7 +169,6 @@ module Libis
170
169
  @bad_mimetypes << mimetype
171
170
  end
172
171
  end
173
-
174
172
  end
175
173
  end
176
174
  end
@@ -1,15 +1,12 @@
1
- # encoding: utf-8
1
+ # frozen_string_literal: true
2
2
 
3
3
  require 'mapi/msg'
4
4
  require 'rfc_2047'
5
5
  require 'cgi'
6
6
  require 'pdfkit'
7
-
8
7
  require 'time'
9
-
10
8
  require 'fileutils'
11
9
  require 'pathname'
12
-
13
10
  require 'libis/format/config'
14
11
 
15
12
  module Libis
@@ -18,23 +15,23 @@ module Libis
18
15
  class MsgToPdf
19
16
  include ::Libis::Tools::Logger
20
17
 
21
- HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {background: white;margin: 0;border: 1px solid #DDD;border-radius: 3px;padding: 8px;width: 100%%;box-sizing: border-box;}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>'
18
+ HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
22
19
  HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
23
20
  HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
24
- HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>'
21
+ HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
25
22
 
26
- IMG_CID_PLAIN_REGEX = %r/\[cid:(.*?)\]/m
27
- IMG_CID_HTML_REGEX = %r/cid:([^"]*)/m
23
+ IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
24
+ IMG_CID_HTML_REGEX = /cid:([^"]*)/m
28
25
 
29
26
  def self.installed?
30
27
  File.exist?(Libis::Format::Config[:wkhtmltopdf])
31
28
  end
32
29
 
33
- def self.run(source, target, options = {})
34
- new.run source, target, options
30
+ def self.run(source, target, **options)
31
+ new.run source, target, **options
35
32
  end
36
33
 
37
- def run(source, target, options = {})
34
+ def run(source, target, **options)
38
35
  # Preliminary checks
39
36
  # ------------------
40
37
 
@@ -56,40 +53,15 @@ module Libis
56
53
  end
57
54
 
58
55
  def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
59
-
60
56
  # Make sure the target directory exists
61
57
  outdir = File.dirname(target)
62
58
  FileUtils.mkdir_p(outdir)
63
59
 
64
- # puts "Headers:"
65
- # puts '--------'
66
- # pp msg.headers
67
-
68
- # puts "Recipients:"
69
- # puts '-----------'
70
- # pp msg.recipients
71
-
72
- # puts "Body:"
73
- # puts '-----'
74
- # puts msg.properties.body
75
- # puts '-----'
76
- # puts msg.properties.body_rtf
77
- # puts '-----'
78
- # puts msg.properties.body_html
79
-
80
- # puts "Attachments:"
81
- # puts '------------'
82
- # msg.attachments.each {|a| p "#{a.filename} - #{a.properties.attach_content_id}"}
83
-
84
- # puts "Converting:"
85
- # puts '-----------'
86
-
87
60
  # Get the body of the message in HTML
88
61
  body = msg.properties.body_html
89
- body ||= begin
90
- # Embed plain body in HTML as a fallback
91
- HTML_WRAPPER_TEMPLATE % msg.properties.body
92
- end
62
+
63
+ # Embed plain body in HTML as a fallback
64
+ body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
93
65
 
94
66
  # Check and fix the character encoding
95
67
  begin
@@ -97,8 +69,8 @@ module Libis
97
69
  body.encode!('UTF-8', universal_newline: true)
98
70
  rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
99
71
  begin
100
- # If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
101
- body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
72
+ # If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
73
+ body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
102
74
  rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
103
75
  # If that fails too, log a warning and replace the invalid/unknown with a ? character.
104
76
  @warnings << "#{e.class}: #{e.message}"
@@ -111,7 +83,7 @@ module Libis
111
83
  headers = {}
112
84
  hdr_html = ''
113
85
 
114
- %w"From To Cc Subject Date".each do |key|
86
+ %w[From To Cc Subject Date].each do |key|
115
87
  value = find_hdr(msg.headers, key)
116
88
  if value
117
89
  headers[key.downcase.to_sym] = value
@@ -121,21 +93,25 @@ module Libis
121
93
 
122
94
  [:date].each do |key|
123
95
  next unless headers[key]
96
+
124
97
  headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
125
98
  end
126
99
 
127
100
  # Add header section to the HTML body
128
101
  unless hdr_html.empty?
129
102
  # Insert header block styles
130
- if body =~ /<\/head>/
103
+ if body =~ %r{</head>}
131
104
  # if head exists, append the style block
132
- body.gsub!(/<\/head>/, HEADER_STYLE + '</head>')
105
+ body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
106
+ elsif body =~ %r{<head/>}
107
+ # empty head, replace with the style block
108
+ body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
133
109
  else
134
110
  # otherwise insert a head section before the body tag
135
- body.gsub!(/<body/, '<head>' + HEADER_STYLE + '</head><body')
111
+ body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
136
112
  end
137
113
  # Add the headers html table as first element in the body section
138
- body.gsub!(/<body[^>]*>/) {|m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}"}
114
+ body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
139
115
  end
140
116
 
141
117
  # Embed inline images
@@ -144,29 +120,23 @@ module Libis
144
120
  used_files = []
145
121
 
146
122
  # First process plaintext cid entries
147
- body.gsub!(IMG_CID_PLAIN_REGEX) do |match|
148
- # puts "CID found: #{match}, looking for #{$1}"
149
- data = getAttachmentData(attachments, $1)
123
+ body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
124
+ data = get_attachment_data(attachments, ::Regexp.last_match(1))
150
125
  if data
151
- # puts "cid #{$1} data: #{data.inspect}"
152
- used_files << $1
126
+ used_files << ::Regexp.last_match(1)
153
127
  "<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
154
128
  else
155
- # puts "cid #{$1} not found"
156
129
  '<img src=""/>'
157
130
  end
158
131
  end
159
-
132
+
160
133
  # Then process HTML img tags with CID entries
161
- body.gsub!(IMG_CID_HTML_REGEX) do |match|
162
- # puts "CID found: #{match}, looking for #{$1}"
163
- data = getAttachmentData(attachments, $1)
164
- if data
165
- # puts "cid #{$1} data: #{data.inspect}"
166
- used_files << $1
167
- "data:#{data[:mime_type]};base64,#{data[:base64]}"
134
+ body.gsub!(IMG_CID_HTML_REGEX) do |_match|
135
+ data = get_attachment_data(attachments, ::Regexp.last_match(1))
136
+ if data
137
+ used_files << ::Regexp.last_match(1)
138
+ "data:#{data[:mime_type]};base64,#{data[:base64]}"
168
139
  else
169
- # puts "cid #{$1} not found"
170
140
  ''
171
141
  end
172
142
  end
@@ -176,59 +146,52 @@ module Libis
176
146
  files = []
177
147
 
178
148
  if target_format == :PDF
179
- # PDF creation options
149
+ # PDF creation options
180
150
  pdf_options = {
181
151
  page_size: 'A4',
182
152
  margin_top: '10mm',
183
153
  margin_bottom: '10mm',
184
154
  margin_left: '10mm',
185
155
  margin_right: '10mm',
186
- dpi: 300,
187
156
  # image_quality: 100,
188
157
  # viewport_size: '2480x3508',
158
+ dpi: 300
189
159
  }.merge pdf_options
190
160
 
191
- # pp pdf_options
192
- # puts "Final HTML body:"
193
- # pp body
194
161
  subject = find_hdr(msg.headers, 'Subject')
195
162
  kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
196
163
  pdf = kit.to_pdf
197
- File.open(target, 'wb') {|f| f.write(pdf)}
198
- # puts "message #{subject} converted to PDF file '#{target}'"
164
+ File.open(target, 'wb') { |f| f.write(pdf) }
199
165
  else
200
- File.open(target, 'wb') {|f| f.write(body)}
201
- # puts "message #{subject} converted to HTML file '#{target}'"
166
+ File.open(target, 'wb') { |f| f.write(body) }
202
167
  end
203
168
  files << target if File.exist?(target)
204
169
 
205
170
  # Save attachments
206
171
  # ----------------
207
172
  outdir = File.join(outdir, "#{File.basename(target)}.attachments")
208
- digits = ((attachments.count + 1)/ 10) + 1
173
+ digits = ((attachments.count + 1) / 10) + 1
209
174
  i = 1
210
- attachments.delete_if {|a| a.properties.attachment_hidden}.each do |a|
211
- prefix = "#{"%0*d" % [digits, i]}-"
212
- if sub_msg = a.instance_variable_get(:@embedded_msg)
213
- # puts "Embedded email message ..."
214
- subject = a.properties[:display_name] || sub_msg.subject || ""
175
+ attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
176
+ prefix = "#{format('%0*d', digits, i)}-"
177
+ if (sub_msg = a.instance_variable_get(:@embedded_msg))
178
+ subject = a.properties[:display_name] || sub_msg.subject || ''
215
179
  file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
216
-
217
180
  result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
218
- if e = result[:error]
219
- raise
181
+ if (e = result[:error])
182
+ raise e
220
183
  end
184
+
221
185
  files += result[:files]
222
186
  elsif a.filename
223
187
  next if used_files.include?(a.filename)
224
- file = File.join(outdir, "#{prefix}#{a.filename}")
225
188
 
189
+ file = File.join(outdir, "#{prefix}#{a.filename}")
226
190
  FileUtils.mkdir_p(File.dirname(file))
227
- File.open(file, 'wb') {|f| a.save(f)}
191
+ File.open(file, 'wb') { |f| a.save(f) }
228
192
  files << file
229
- # puts "Attachment file '#{file}' created"
230
193
  else
231
- @warnings << "Attachment #{a.properties[:display_name]} cannot be saved"
194
+ @warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
232
195
  next
233
196
  end
234
197
  i += 1
@@ -240,28 +203,26 @@ module Libis
240
203
  (headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
241
204
  end
242
205
  end
243
-
206
+
244
207
  {
245
- command: {status: 0},
246
- files: files,
247
- headers: headers,
208
+ command: { status: 0 },
209
+ files:,
210
+ headers:,
248
211
  warnings: @warnings
249
212
  }
250
-
251
213
  rescue Exception => e
252
- # puts "ERROR: Exception #{e.class} raised: #{e.message}"
253
- # e.backtrace.each {|t| puts " - #{t}"}
254
214
  raise unless root_msg
215
+
255
216
  msg.close
256
- return {
257
- command: {status: -1},
217
+ {
218
+ command: { status: -1 },
258
219
  files: [],
259
220
  headers: {},
260
221
  errors: [
261
222
  {
262
223
  error: e.message,
263
224
  error_class: e.class.name,
264
- error_trace: e.backtrace,
225
+ error_trace: e.backtrace
265
226
  }
266
227
  ],
267
228
  warnings: @warnings
@@ -270,15 +231,13 @@ module Libis
270
231
 
271
232
  protected
272
233
 
273
- def eml_to_html
274
-
275
- end
234
+ def eml_to_html; end
276
235
 
277
236
  private
278
237
 
279
238
  def find_hdr(list, key)
280
239
  keys = list.keys
281
- if k = keys.find {|x| x.to_s =~ /^#{key}$/i}
240
+ if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
282
241
  v = list[k]
283
242
  v = v.first if v.is_a? Array
284
243
  v = Rfc2047.decode(v).strip if v.is_a? String
@@ -288,27 +247,23 @@ module Libis
288
247
  end
289
248
 
290
249
  def hdr_html(key, value)
291
- return HEADER_FIELD_TEMPLATE % [key, CGI::escapeHTML(value)] if key.is_a?(String) && value.is_a?(String) && !value.empty?
250
+ return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
251
+
292
252
  ''
293
253
  end
294
254
 
295
- def getAttachmentData(attachments, cid)
255
+ def get_attachment_data(attachments, cid)
296
256
  attachments.each do |attachment|
297
- if attachment.properties.attach_content_id == cid
298
- attachment.data.rewind
299
- return {
300
- mime_type: attachment.properties.attach_mime_tag,
301
- base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
302
- }
303
- end
257
+ next unless attachment.properties.attach_content_id == cid
258
+
259
+ attachment.data.rewind
260
+ return {
261
+ mime_type: attachment.properties.attach_mime_tag,
262
+ base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
263
+ }
304
264
  end
305
265
  nil
306
266
  end
307
-
308
- def read_header(headers_file)
309
- headers = YAML.load_file(headers_file)
310
- headers.symbolize_keys
311
- end
312
267
  end
313
268
  end
314
269
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'fileutils'
2
4
 
3
5
  require 'libis/tools/extend/string'
@@ -9,60 +11,61 @@ require 'libis/format/config'
9
11
  module Libis
10
12
  module Format
11
13
  module Tool
12
-
13
14
  class OfficeToPdf
14
15
  include ::Libis::Tools::Logger
15
16
 
16
17
  def self.installed?
17
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], "--version")
18
- result[:status] == 0
18
+ result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
+ (result[:status]).zero?
19
20
  end
20
21
 
21
- def self.run(source, target, options = {})
22
- self.new.run source, target, options
22
+ def self.run(source, target, **options)
23
+ new.run source, target, **options
23
24
  end
24
25
 
25
- def run(source, target, options = {})
26
+ def run(source, target, **options)
26
27
  workdir = '/...'
27
28
  workdir = Dir.tmpdir unless Dir.exist? workdir
28
29
 
29
- workdir = File.join(workdir, rand(1000000).to_s)
30
+ workdir = File.join(workdir, rand(1_000_000).to_s)
30
31
  FileUtils.mkpath(workdir)
31
32
 
32
33
  src_file = File.join(workdir, File.basename(source))
33
34
  FileUtils.symlink source, src_file
34
35
 
35
- tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
36
+ tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
36
37
 
37
38
  export_filter = options[:export_filter] || 'pdf'
38
39
 
39
40
  timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
40
41
  result = Libis::Tools::Command.run(
41
- Libis::Format::Config[:soffice_cmd], '--headless',
42
- "-env:UserInstallation=file://#{workdir}",
43
- '--convert-to', export_filter,
44
- '--outdir', workdir, src_file,
45
- timeout: timeout,
46
- kill_after: timeout * 2
42
+ Libis::Format::Config[:soffice_cmd], '--headless',
43
+ "-env:UserInstallation=file://#{workdir}",
44
+ '--convert-to', export_filter,
45
+ '--outdir', workdir, src_file,
46
+ timeout:,
47
+ kill_after: timeout * 2
47
48
  )
48
49
 
49
- raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
50
+ raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
51
+
50
52
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
51
- raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
53
+ raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
52
54
 
53
55
  FileUtils.copy tgt_file, target, preserve: true
54
56
 
55
57
  {
56
58
  command: result,
57
- files: [ target ]
59
+ files: [target]
58
60
  }
59
-
60
61
  ensure
61
- FileUtils.rmtree workdir rescue nil
62
-
62
+ begin
63
+ FileUtils.rmtree workdir
64
+ rescue StandardError
65
+ nil
66
+ end
63
67
  end
64
68
  end
65
-
66
69
  end
67
70
  end
68
71
  end
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  require 'os'
2
4
 
3
5
  require 'libis/tools/extend/string'
@@ -9,49 +11,46 @@ require 'libis/format/config'
9
11
  module Libis
10
12
  module Format
11
13
  module Tool
12
-
13
14
  class PdfSplit
14
15
  include ::Libis::Tools::Logger
15
16
 
16
17
  def self.installed?
17
- result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
18
- return false unless result[:status] == 0
18
+ result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
19
+ return false unless (result[:status]).zero?
20
+
19
21
  File.exist?(Libis::Format::Config[:pdf_tool])
20
22
  end
21
23
 
22
- def self.run(source, target, options = [])
23
- self.new.run source, target, options
24
+ def self.run(source, target, *args)
25
+ new.run source, target, *args
24
26
  end
25
27
 
26
- def run(source, target, options = [])
27
-
28
+ def run(source, target, *args)
28
29
  if OS.java?
29
30
  # TODO: import library and execute in current VM. For now do exactly as in MRI.
30
31
  end
31
32
 
32
33
  timeout = Libis::Format::Config[:timeouts][:pdf_split]
33
34
  result = Libis::Tools::Command.run(
34
- Libis::Format::Config[:java_cmd],
35
- '-cp', Libis::Format::Config[:pdf_tool],
36
- 'SplitPdf',
37
- '--file_input', source,
38
- '--file_output', target,
39
- *options,
40
- timeout: timeout,
41
- kill_after: timeout * 2
35
+ Libis::Format::Config[:java_cmd],
36
+ '-cp', Libis::Format::Config[:pdf_tool],
37
+ 'SplitPdf',
38
+ '--file_input', source,
39
+ '--file_output', target,
40
+ *args,
41
+ timeout:,
42
+ kill_after: timeout * 2
42
43
  )
43
44
 
44
- raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
45
- raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
45
+ raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
46
+ raise "#{self.class} errors: #{result[:err].join("\n")}" unless (result[:status]).zero? && result[:err].empty?
46
47
 
47
48
  {
48
49
  command: result,
49
- files: [ target ] # TODO: collect the files
50
+ files: [target] # TODO: collect the files
50
51
  }
51
-
52
52
  end
53
53
  end
54
-
55
54
  end
56
55
  end
57
56
  end