libis-format 1.2.7 → 1.2.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +7 -1
- data/lib/libis/format/cli/prompt_helper.rb +32 -24
- data/lib/libis/format/converter/audio_converter.rb +23 -30
- data/lib/libis/format/identifier.rb +1 -1
- data/lib/libis/format/tool/extension_identification.rb +23 -25
- data/lib/libis/format/tool/fido.rb +22 -27
- data/lib/libis/format/tool/file_tool.rb +11 -24
- data/lib/libis/format/tool/fop_pdf.rb +19 -20
- data/lib/libis/format/tool/identification_tool.rb +34 -36
- data/lib/libis/format/tool/msg_to_pdf.rb +66 -111
- data/lib/libis/format/tool/office_to_pdf.rb +24 -21
- data/lib/libis/format/tool/pdf_split.rb +19 -20
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -30
- data/lib/libis/format/tool/pdfa_validator.rb +16 -14
- data/lib/libis/format/version.rb +3 -1
- data/libis-format.gemspec +23 -22
- data/tools/fop/fop.bat +75 -75
- data/tools/fop/fop.cmd +31 -31
- data/tools/fop/fop.js +341 -341
- data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
- data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
- data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
- data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
- metadata +69 -79
- data/.coveralls.yml +0 -2
- data/.gitignore +0 -21
- data/.travis.yml +0 -74
- data/.vscode/launch.json +0 -21
- data/Rakefile +0 -12
- data/base/Dockerfile +0 -33
- data/base/Dockerfile.alpine +0 -20
- data/base/Dockerfile.rvm +0 -56
- data/base/rework_path +0 -25
- data/docker_cfg.yml +0 -1
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'csv'
         | 
| 2 4 | 
             
            require 'tmpdir'
         | 
| 3 5 |  | 
| @@ -12,42 +14,41 @@ require 'libis/format/type_database' | |
| 12 14 | 
             
            module Libis
         | 
| 13 15 | 
             
              module Format
         | 
| 14 16 | 
             
                module Tool
         | 
| 15 | 
            -
             | 
| 16 | 
            -
                    class IdentificationTool
         | 
| 17 | 
            +
                  class IdentificationTool
         | 
| 17 18 | 
             
                    include Singleton
         | 
| 18 19 | 
             
                    include ::Libis::Tools::Logger
         | 
| 19 20 |  | 
| 20 21 | 
             
                    def self.bad_mimetype(mimetype)
         | 
| 21 | 
            -
                       | 
| 22 | 
            +
                      instance.bad_mimetype(mimetype)
         | 
| 22 23 | 
             
                    end
         | 
| 23 24 |  | 
| 24 | 
            -
                    def self.run(file, recursive = false, options | 
| 25 | 
            -
                      options ||= {}
         | 
| 25 | 
            +
                    def self.run(file, recursive = false, **options)
         | 
| 26 26 | 
             
                      if file.is_a?(Array)
         | 
| 27 | 
            -
                        return run_list file, options
         | 
| 28 | 
            -
                      elsif file.is_a?(String) && File. | 
| 27 | 
            +
                        return run_list file, **options
         | 
| 28 | 
            +
                      elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
         | 
| 29 29 | 
             
                        if File.directory?(file)
         | 
| 30 | 
            -
                          return run_dir(file, recursive, options)
         | 
| 30 | 
            +
                          return run_dir(file, recursive, **options)
         | 
| 31 31 | 
             
                        elsif File.file?(file)
         | 
| 32 | 
            -
                          return  | 
| 32 | 
            +
                          return instance.run(file, **options)
         | 
| 33 33 | 
             
                        end
         | 
| 34 34 | 
             
                      end
         | 
| 35 | 
            +
             | 
| 35 36 | 
             
                      raise ArgumentError,
         | 
| 36 37 | 
             
                            'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
         | 
| 37 38 | 
             
                    end
         | 
| 38 39 |  | 
| 39 | 
            -
                    def self.run_dir(file, recursive = true, options | 
| 40 | 
            -
                       | 
| 40 | 
            +
                    def self.run_dir(file, recursive = true, **options)
         | 
| 41 | 
            +
                      instance.run_dir file, recursive, **options
         | 
| 41 42 | 
             
                    end
         | 
| 42 43 |  | 
| 43 | 
            -
                    def self.run_list(filelist | 
| 44 | 
            -
                       | 
| 44 | 
            +
                    def self.run_list(filelist, **options)
         | 
| 45 | 
            +
                      instance.run_list filelist, **options
         | 
| 45 46 | 
             
                    end
         | 
| 46 47 |  | 
| 47 48 | 
             
                    protected
         | 
| 48 49 |  | 
| 49 50 | 
             
                    def create_list_file(filelist)
         | 
| 50 | 
            -
                      list_file = Tempfile.new(%w | 
| 51 | 
            +
                      list_file = Tempfile.new(%w[file .list])
         | 
| 51 52 | 
             
                      filelist.each do |fname|
         | 
| 52 53 | 
             
                        list_file.write "#{fname}\n"
         | 
| 53 54 | 
             
                      end
         | 
| @@ -83,20 +84,19 @@ module Libis | |
| 83 84 | 
             
                    #   { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
         | 
| 84 85 | 
             
                    #
         | 
| 85 86 | 
             
                    def process_output(output)
         | 
| 86 | 
            -
                      output. | 
| 87 | 
            +
                      output.each_with_object({}) do |x, results|
         | 
| 87 88 | 
             
                        filepath = File.absolute_path(x.delete(:filepath)).freeze
         | 
| 88 89 | 
             
                        results[filepath] ||= []
         | 
| 89 90 | 
             
                        results[filepath] << annotate(x)
         | 
| 90 | 
            -
                        results
         | 
| 91 91 | 
             
                      end
         | 
| 92 92 | 
             
                    end
         | 
| 93 93 |  | 
| 94 94 | 
             
                    # Enhance the output with mimetype and score
         | 
| 95 95 | 
             
                    def annotate(result)
         | 
| 96 96 | 
             
                      # Enhance result with mimetype if needed
         | 
| 97 | 
            -
                       | 
| 97 | 
            +
                      bad_mimetypes.include?(result[:mimetype]) &&
         | 
| 98 | 
            +
                        !bad_puids.include?(result[:puid]) &&
         | 
| 98 99 | 
             
                        result[:mimetype] = get_mimetype(result[:puid])
         | 
| 99 | 
            -
                      end
         | 
| 100 100 |  | 
| 101 101 | 
             
                      # Normalize the mimetype
         | 
| 102 102 | 
             
                      Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
         | 
| @@ -108,55 +108,54 @@ module Libis | |
| 108 108 | 
             
                      result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
         | 
| 109 109 |  | 
| 110 110 | 
             
                      # freeze all strings
         | 
| 111 | 
            -
                      result.each {|_, v| v.freeze if v.is_a?(String)}
         | 
| 111 | 
            +
                      result.each { |_, v| v.freeze if v.is_a?(String) }
         | 
| 112 112 |  | 
| 113 113 | 
             
                      # Adapt score based on matchtype
         | 
| 114 114 | 
             
                      result[:matchtype] = result[:matchtype].to_s.downcase
         | 
| 115 115 | 
             
                      case result[:matchtype]
         | 
| 116 116 |  | 
| 117 117 | 
             
                        # Signature match increases score with 2
         | 
| 118 | 
            -
             | 
| 119 | 
            -
             | 
| 118 | 
            +
                      when 'signature'
         | 
| 119 | 
            +
                        result[:score] += 2
         | 
| 120 120 | 
             
                        # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
         | 
| 121 121 | 
             
                        # ext = File.extname(result[:filename])
         | 
| 122 122 | 
             
                        # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
         | 
| 123 123 |  | 
| 124 124 | 
             
                        # Container match increases score with 4
         | 
| 125 | 
            -
             | 
| 126 | 
            -
             | 
| 125 | 
            +
                      when 'container'
         | 
| 126 | 
            +
                        result[:score] += 4
         | 
| 127 127 | 
             
                        # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
         | 
| 128 128 | 
             
                        # ext = File.extname(result[:filename])
         | 
| 129 129 | 
             
                        # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
         | 
| 130 130 |  | 
| 131 131 | 
             
                        # Extension match is the weakest identification; score is lowered by 2 points
         | 
| 132 | 
            -
             | 
| 133 | 
            -
             | 
| 132 | 
            +
                      when 'extension'
         | 
| 133 | 
            +
                        result[:score] -= 2
         | 
| 134 134 |  | 
| 135 135 | 
             
                        # Magic code (file tool) is to be trused even less
         | 
| 136 | 
            -
             | 
| 137 | 
            -
             | 
| 136 | 
            +
                      when 'magic'
         | 
| 137 | 
            +
                        result[:score] -= 3
         | 
| 138 138 |  | 
| 139 | 
            -
                        # Or no change otherwise
         | 
| 140 | 
            -
                        else
         | 
| 141 | 
            -
                          # do nothing
         | 
| 142 139 | 
             
                      end
         | 
| 143 140 |  | 
| 144 141 | 
             
                      # Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
         | 
| 145 142 | 
             
                      # Office OpenXML, OpenDocument, jar, maff, svx)
         | 
| 146 | 
            -
                      if result[:mimetype] == 'application/zip'
         | 
| 147 | 
            -
                        result[:score] -= 2
         | 
| 148 | 
            -
                      end
         | 
| 143 | 
            +
                      result[:score] -= 2 if result[:mimetype] == 'application/zip'
         | 
| 149 144 |  | 
| 150 145 | 
             
                      # Return result enhanced with mimetype and score fields
         | 
| 151 146 | 
             
                      result
         | 
| 152 147 | 
             
                    end
         | 
| 153 148 |  | 
| 154 149 | 
             
                    def get_mimetype(puid)
         | 
| 155 | 
            -
                      ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first | 
| 150 | 
            +
                      ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
         | 
| 151 | 
            +
                    rescue StandardError
         | 
| 152 | 
            +
                      nil
         | 
| 156 153 | 
             
                    end
         | 
| 157 154 |  | 
| 158 155 | 
             
                    def get_puid(mimetype)
         | 
| 159 | 
            -
                      ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first | 
| 156 | 
            +
                      ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
         | 
| 157 | 
            +
                    rescue StandardError
         | 
| 158 | 
            +
                      nil
         | 
| 160 159 | 
             
                    end
         | 
| 161 160 |  | 
| 162 161 | 
             
                    attr_accessor :bad_mimetypes, :bad_puids
         | 
| @@ -170,7 +169,6 @@ module Libis | |
| 170 169 | 
             
                      @bad_mimetypes << mimetype
         | 
| 171 170 | 
             
                    end
         | 
| 172 171 | 
             
                  end
         | 
| 173 | 
            -
             | 
| 174 172 | 
             
                end
         | 
| 175 173 | 
             
              end
         | 
| 176 174 | 
             
            end
         | 
| @@ -1,15 +1,12 @@ | |
| 1 | 
            -
            #  | 
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 2 |  | 
| 3 3 | 
             
            require 'mapi/msg'
         | 
| 4 4 | 
             
            require 'rfc_2047'
         | 
| 5 5 | 
             
            require 'cgi'
         | 
| 6 6 | 
             
            require 'pdfkit'
         | 
| 7 | 
            -
             | 
| 8 7 | 
             
            require 'time'
         | 
| 9 | 
            -
             | 
| 10 8 | 
             
            require 'fileutils'
         | 
| 11 9 | 
             
            require 'pathname'
         | 
| 12 | 
            -
             | 
| 13 10 | 
             
            require 'libis/format/config'
         | 
| 14 11 |  | 
| 15 12 | 
             
            module Libis
         | 
| @@ -18,23 +15,23 @@ module Libis | |
| 18 15 | 
             
                  class MsgToPdf
         | 
| 19 16 | 
             
                    include ::Libis::Tools::Logger
         | 
| 20 17 |  | 
| 21 | 
            -
                    HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {background: white | 
| 18 | 
            +
                    HEADER_STYLE = '<style>.header-table {margin: 0 0 20 0;padding: 0;font-family: Arial, Helvetica, sans-serif;}.header-name {padding-right: 5px;color: #9E9E9E;text-align: right;vertical-align: top;font-size: 12px;}.header-value {font-size: 12px;}#header_fields {#background: white;#margin: 0;#border: 1px solid #DDD;#border-radius: 3px;#padding: 8px;#width: 100%%;#box-sizing: border-box;#}</style><script type="text/javascript">function timer() {try {parent.postMessage(Math.max(document.body.offsetHeight, document.body.scrollHeight), \'*\');} catch (r) {}setTimeout(timer, 10);};timer();</script>' # rubocop:disable Layout/LineLength
         | 
| 22 19 | 
             
                    HEADER_TABLE_TEMPLATE = '<div class="header-table"><table id="header_fields"><tbody>%s</tbody></table></div>'
         | 
| 23 20 | 
             
                    HEADER_FIELD_TEMPLATE = '<tr><td class="header-name">%s</td><td class="header-value">%s</td></tr>'
         | 
| 24 | 
            -
                    HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>'
         | 
| 21 | 
            +
                    HTML_WRAPPER_TEMPLATE = '<!DOCTYPE html><html><head><style>body {font-size: 0.5cm;}</style><title>title</title></head><body>%s</body></html>' # rubocop:disable Layout/LineLength
         | 
| 25 22 |  | 
| 26 | 
            -
                    IMG_CID_PLAIN_REGEX =  | 
| 27 | 
            -
                    IMG_CID_HTML_REGEX =  | 
| 23 | 
            +
                    IMG_CID_PLAIN_REGEX = /\[cid:(.*?)\]/m
         | 
| 24 | 
            +
                    IMG_CID_HTML_REGEX = /cid:([^"]*)/m
         | 
| 28 25 |  | 
| 29 26 | 
             
                    def self.installed?
         | 
| 30 27 | 
             
                      File.exist?(Libis::Format::Config[:wkhtmltopdf])
         | 
| 31 28 | 
             
                    end
         | 
| 32 29 |  | 
| 33 | 
            -
                    def self.run(source, target, options | 
| 34 | 
            -
                      new.run source, target, options
         | 
| 30 | 
            +
                    def self.run(source, target, **options)
         | 
| 31 | 
            +
                      new.run source, target, **options
         | 
| 35 32 | 
             
                    end
         | 
| 36 33 |  | 
| 37 | 
            -
                    def run(source, target, options | 
| 34 | 
            +
                    def run(source, target, **options)
         | 
| 38 35 | 
             
                      # Preliminary checks
         | 
| 39 36 | 
             
                      # ------------------
         | 
| 40 37 |  | 
| @@ -56,40 +53,15 @@ module Libis | |
| 56 53 | 
             
                    end
         | 
| 57 54 |  | 
| 58 55 | 
             
                    def msg_to_pdf(msg, target, target_format, pdf_options, root_msg: true)
         | 
| 59 | 
            -
             | 
| 60 56 | 
             
                      # Make sure the target directory exists
         | 
| 61 57 | 
             
                      outdir = File.dirname(target)
         | 
| 62 58 | 
             
                      FileUtils.mkdir_p(outdir)
         | 
| 63 59 |  | 
| 64 | 
            -
            # puts "Headers:"
         | 
| 65 | 
            -
            # puts '--------'
         | 
| 66 | 
            -
            # pp msg.headers
         | 
| 67 | 
            -
             | 
| 68 | 
            -
            # puts "Recipients:"
         | 
| 69 | 
            -
            # puts '-----------'
         | 
| 70 | 
            -
            # pp msg.recipients
         | 
| 71 | 
            -
             | 
| 72 | 
            -
            # puts "Body:"
         | 
| 73 | 
            -
            # puts '-----'
         | 
| 74 | 
            -
            # puts msg.properties.body
         | 
| 75 | 
            -
            # puts '-----'
         | 
| 76 | 
            -
            # puts msg.properties.body_rtf
         | 
| 77 | 
            -
            # puts '-----'
         | 
| 78 | 
            -
            # puts msg.properties.body_html
         | 
| 79 | 
            -
             | 
| 80 | 
            -
            # puts "Attachments:"
         | 
| 81 | 
            -
            # puts '------------'
         | 
| 82 | 
            -
            # msg.attachments.each {|a| p "#{a.filename} - #{a.properties.attach_content_id}"}
         | 
| 83 | 
            -
             | 
| 84 | 
            -
            # puts "Converting:"
         | 
| 85 | 
            -
            # puts '-----------'
         | 
| 86 | 
            -
             | 
| 87 60 | 
             
                      # Get the body of the message in HTML
         | 
| 88 61 | 
             
                      body = msg.properties.body_html
         | 
| 89 | 
            -
             | 
| 90 | 
            -
             | 
| 91 | 
            -
             | 
| 92 | 
            -
                      end
         | 
| 62 | 
            +
             | 
| 63 | 
            +
                      # Embed plain body in HTML as a fallback
         | 
| 64 | 
            +
                      body ||= HTML_WRAPPER_TEMPLATE % msg.properties.body
         | 
| 93 65 |  | 
| 94 66 | 
             
                      # Check and fix the character encoding
         | 
| 95 67 | 
             
                      begin
         | 
| @@ -97,8 +69,8 @@ module Libis | |
| 97 69 | 
             
                        body.encode!('UTF-8', universal_newline: true)
         | 
| 98 70 | 
             
                      rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError
         | 
| 99 71 | 
             
                        begin
         | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 72 | 
            +
                          # If it fails, the text may be in Windows' Latin1 (ISO-8859-1)
         | 
| 73 | 
            +
                          body.force_encoding('ISO-8859-1').encode!('UTF-8', universal_newline: true)
         | 
| 102 74 | 
             
                        rescue Encoding::InvalidByteSequenceError, Encoding::UndefinedConversionError => e
         | 
| 103 75 | 
             
                          # If that fails too, log a warning and replace the invalid/unknown with a ? character.
         | 
| 104 76 | 
             
                          @warnings << "#{e.class}: #{e.message}"
         | 
| @@ -111,7 +83,7 @@ module Libis | |
| 111 83 | 
             
                      headers = {}
         | 
| 112 84 | 
             
                      hdr_html = ''
         | 
| 113 85 |  | 
| 114 | 
            -
                      %w | 
| 86 | 
            +
                      %w[From To Cc Subject Date].each do |key|
         | 
| 115 87 | 
             
                        value = find_hdr(msg.headers, key)
         | 
| 116 88 | 
             
                        if value
         | 
| 117 89 | 
             
                          headers[key.downcase.to_sym] = value
         | 
| @@ -121,21 +93,25 @@ module Libis | |
| 121 93 |  | 
| 122 94 | 
             
                      [:date].each do |key|
         | 
| 123 95 | 
             
                        next unless headers[key]
         | 
| 96 | 
            +
             | 
| 124 97 | 
             
                        headers[key] = DateTime.parse(headers[key]).to_time.localtime.iso8601
         | 
| 125 98 | 
             
                      end
         | 
| 126 99 |  | 
| 127 100 | 
             
                      # Add header section to the HTML body
         | 
| 128 101 | 
             
                      unless hdr_html.empty?
         | 
| 129 102 | 
             
                        # Insert header block styles
         | 
| 130 | 
            -
                        if body =~  | 
| 103 | 
            +
                        if body =~ %r{</head>}
         | 
| 131 104 | 
             
                          # if head exists, append the style block
         | 
| 132 | 
            -
                          body.gsub!( | 
| 105 | 
            +
                          body.gsub!(%r{</head>}, "#{HEADER_STYLE}</head>")
         | 
| 106 | 
            +
                        elsif body =~ %r{<head/>}
         | 
| 107 | 
            +
                          # empty head, replace with the style block
         | 
| 108 | 
            +
                          body.gsub!(%r{<head/>}, "<head>#{HEADER_STYLE}</head>")
         | 
| 133 109 | 
             
                        else
         | 
| 134 110 | 
             
                          # otherwise insert a head section before the body tag
         | 
| 135 | 
            -
                          body.gsub!(/<body/,  | 
| 111 | 
            +
                          body.gsub!(/<body/, "<head>#{HEADER_STYLE}</head><body")
         | 
| 136 112 | 
             
                        end
         | 
| 137 113 | 
             
                        # Add the headers html table as first element in the body section
         | 
| 138 | 
            -
                        body.gsub!(/<body[^>]*>/) {|m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}"}
         | 
| 114 | 
            +
                        body.gsub!(/<body[^>]*>/) { |m| "#{m}#{HEADER_TABLE_TEMPLATE % hdr_html}" }
         | 
| 139 115 | 
             
                      end
         | 
| 140 116 |  | 
| 141 117 | 
             
                      # Embed inline images
         | 
| @@ -144,29 +120,23 @@ module Libis | |
| 144 120 | 
             
                      used_files = []
         | 
| 145 121 |  | 
| 146 122 | 
             
                      # First process plaintext cid entries
         | 
| 147 | 
            -
                      body.gsub!(IMG_CID_PLAIN_REGEX) do | | 
| 148 | 
            -
             | 
| 149 | 
            -
                        data = getAttachmentData(attachments, $1)
         | 
| 123 | 
            +
                      body.gsub!(IMG_CID_PLAIN_REGEX) do |_match|
         | 
| 124 | 
            +
                        data = get_attachment_data(attachments, ::Regexp.last_match(1))
         | 
| 150 125 | 
             
                        if data
         | 
| 151 | 
            -
             | 
| 152 | 
            -
                          used_files << $1
         | 
| 126 | 
            +
                          used_files << ::Regexp.last_match(1)
         | 
| 153 127 | 
             
                          "<img src=\"data:#{data[:mime_type]};base64,#{data[:base64]}\"/>"
         | 
| 154 128 | 
             
                        else
         | 
| 155 | 
            -
              # puts "cid #{$1} not found"
         | 
| 156 129 | 
             
                          '<img src=""/>'
         | 
| 157 130 | 
             
                        end
         | 
| 158 131 | 
             
                      end
         | 
| 159 | 
            -
             | 
| 132 | 
            +
             | 
| 160 133 | 
             
                      # Then process HTML img tags with CID entries
         | 
| 161 | 
            -
                      body.gsub!(IMG_CID_HTML_REGEX) do | | 
| 162 | 
            -
             | 
| 163 | 
            -
                        data | 
| 164 | 
            -
             | 
| 165 | 
            -
             | 
| 166 | 
            -
                        used_files << $1
         | 
| 167 | 
            -
                        "data:#{data[:mime_type]};base64,#{data[:base64]}"
         | 
| 134 | 
            +
                      body.gsub!(IMG_CID_HTML_REGEX) do |_match|
         | 
| 135 | 
            +
                        data = get_attachment_data(attachments, ::Regexp.last_match(1))
         | 
| 136 | 
            +
                        if data
         | 
| 137 | 
            +
                          used_files << ::Regexp.last_match(1)
         | 
| 138 | 
            +
                          "data:#{data[:mime_type]};base64,#{data[:base64]}"
         | 
| 168 139 | 
             
                        else
         | 
| 169 | 
            -
            # puts "cid #{$1} not found"
         | 
| 170 140 | 
             
                          ''
         | 
| 171 141 | 
             
                        end
         | 
| 172 142 | 
             
                      end
         | 
| @@ -176,59 +146,52 @@ module Libis | |
| 176 146 | 
             
                      files = []
         | 
| 177 147 |  | 
| 178 148 | 
             
                      if target_format == :PDF
         | 
| 179 | 
            -
             | 
| 149 | 
            +
                        # PDF creation options
         | 
| 180 150 | 
             
                        pdf_options = {
         | 
| 181 151 | 
             
                          page_size: 'A4',
         | 
| 182 152 | 
             
                          margin_top: '10mm',
         | 
| 183 153 | 
             
                          margin_bottom: '10mm',
         | 
| 184 154 | 
             
                          margin_left: '10mm',
         | 
| 185 155 | 
             
                          margin_right: '10mm',
         | 
| 186 | 
            -
                          dpi: 300,
         | 
| 187 156 | 
             
                          # image_quality: 100,
         | 
| 188 157 | 
             
                          # viewport_size: '2480x3508',
         | 
| 158 | 
            +
                          dpi: 300
         | 
| 189 159 | 
             
                        }.merge pdf_options
         | 
| 190 160 |  | 
| 191 | 
            -
            # pp pdf_options
         | 
| 192 | 
            -
            # puts "Final HTML body:"
         | 
| 193 | 
            -
            # pp body
         | 
| 194 161 | 
             
                        subject = find_hdr(msg.headers, 'Subject')
         | 
| 195 162 | 
             
                        kit = PDFKit.new(body, title: (subject || 'message'), **pdf_options)
         | 
| 196 163 | 
             
                        pdf = kit.to_pdf
         | 
| 197 | 
            -
                        File.open(target, 'wb') {|f| f.write(pdf)}
         | 
| 198 | 
            -
            # puts "message #{subject} converted to PDF file '#{target}'"
         | 
| 164 | 
            +
                        File.open(target, 'wb') { |f| f.write(pdf) }
         | 
| 199 165 | 
             
                      else
         | 
| 200 | 
            -
                        File.open(target, 'wb') {|f| f.write(body)}
         | 
| 201 | 
            -
            # puts "message #{subject} converted to HTML file '#{target}'"
         | 
| 166 | 
            +
                        File.open(target, 'wb') { |f| f.write(body) }
         | 
| 202 167 | 
             
                      end
         | 
| 203 168 | 
             
                      files << target if File.exist?(target)
         | 
| 204 169 |  | 
| 205 170 | 
             
                      # Save attachments
         | 
| 206 171 | 
             
                      # ----------------
         | 
| 207 172 | 
             
                      outdir = File.join(outdir, "#{File.basename(target)}.attachments")
         | 
| 208 | 
            -
                      digits = ((attachments.count + 1)/ 10) + 1
         | 
| 173 | 
            +
                      digits = ((attachments.count + 1) / 10) + 1
         | 
| 209 174 | 
             
                      i = 1
         | 
| 210 | 
            -
                      attachments.delete_if {|a| a.properties.attachment_hidden}.each do |a|
         | 
| 211 | 
            -
                        prefix = "#{ | 
| 212 | 
            -
                        if sub_msg = a.instance_variable_get(:@embedded_msg)
         | 
| 213 | 
            -
             | 
| 214 | 
            -
                          subject = a.properties[:display_name] || sub_msg.subject || ""
         | 
| 175 | 
            +
                      attachments.delete_if { |a| a.properties.attachment_hidden }.each do |a|
         | 
| 176 | 
            +
                        prefix = "#{format('%0*d', digits, i)}-"
         | 
| 177 | 
            +
                        if (sub_msg = a.instance_variable_get(:@embedded_msg))
         | 
| 178 | 
            +
                          subject = a.properties[:display_name] || sub_msg.subject || ''
         | 
| 215 179 | 
             
                          file = File.join(outdir, "#{prefix}#{subject}.msg.#{target_format.to_s.downcase}")
         | 
| 216 | 
            -
             | 
| 217 180 | 
             
                          result = msg_to_pdf(sub_msg, file, target_format, pdf_options, root_msg: false)
         | 
| 218 | 
            -
                          if e = result[:error]
         | 
| 219 | 
            -
                            raise 
         | 
| 181 | 
            +
                          if (e = result[:error])
         | 
| 182 | 
            +
                            raise e
         | 
| 220 183 | 
             
                          end
         | 
| 184 | 
            +
             | 
| 221 185 | 
             
                          files += result[:files]
         | 
| 222 186 | 
             
                        elsif a.filename
         | 
| 223 187 | 
             
                          next if used_files.include?(a.filename)
         | 
| 224 | 
            -
                          file = File.join(outdir, "#{prefix}#{a.filename}")
         | 
| 225 188 |  | 
| 189 | 
            +
                          file = File.join(outdir, "#{prefix}#{a.filename}")
         | 
| 226 190 | 
             
                          FileUtils.mkdir_p(File.dirname(file))
         | 
| 227 | 
            -
                          File.open(file, 'wb') {|f| a.save(f)}
         | 
| 191 | 
            +
                          File.open(file, 'wb') { |f| a.save(f) }
         | 
| 228 192 | 
             
                          files << file
         | 
| 229 | 
            -
            # puts "Attachment file '#{file}' created"
         | 
| 230 193 | 
             
                        else
         | 
| 231 | 
            -
                          @warnings << "Attachment #{a.properties[:display_name]} cannot be  | 
| 194 | 
            +
                          @warnings << "Attachment #{a.properties[:display_name]} cannot be extracted"
         | 
| 232 195 | 
             
                          next
         | 
| 233 196 | 
             
                        end
         | 
| 234 197 | 
             
                        i += 1
         | 
| @@ -240,28 +203,26 @@ module Libis | |
| 240 203 | 
             
                          (headers[:attachments] ||= []) << Pathname.new(f).relative_path_from(p).to_s
         | 
| 241 204 | 
             
                        end
         | 
| 242 205 | 
             
                      end
         | 
| 243 | 
            -
             | 
| 206 | 
            +
             | 
| 244 207 | 
             
                      {
         | 
| 245 | 
            -
                        command: {status: 0},
         | 
| 246 | 
            -
                        files | 
| 247 | 
            -
                        headers | 
| 208 | 
            +
                        command: { status: 0 },
         | 
| 209 | 
            +
                        files:,
         | 
| 210 | 
            +
                        headers:,
         | 
| 248 211 | 
             
                        warnings: @warnings
         | 
| 249 212 | 
             
                      }
         | 
| 250 | 
            -
                      
         | 
| 251 213 | 
             
                    rescue Exception => e
         | 
| 252 | 
            -
            # puts "ERROR: Exception #{e.class} raised: #{e.message}"
         | 
| 253 | 
            -
            # e.backtrace.each {|t| puts " - #{t}"}
         | 
| 254 214 | 
             
                      raise unless root_msg
         | 
| 215 | 
            +
             | 
| 255 216 | 
             
                      msg.close
         | 
| 256 | 
            -
                       | 
| 257 | 
            -
                        command: {status: -1},
         | 
| 217 | 
            +
                      {
         | 
| 218 | 
            +
                        command: { status: -1 },
         | 
| 258 219 | 
             
                        files: [],
         | 
| 259 220 | 
             
                        headers: {},
         | 
| 260 221 | 
             
                        errors: [
         | 
| 261 222 | 
             
                          {
         | 
| 262 223 | 
             
                            error: e.message,
         | 
| 263 224 | 
             
                            error_class: e.class.name,
         | 
| 264 | 
            -
                            error_trace: e.backtrace | 
| 225 | 
            +
                            error_trace: e.backtrace
         | 
| 265 226 | 
             
                          }
         | 
| 266 227 | 
             
                        ],
         | 
| 267 228 | 
             
                        warnings: @warnings
         | 
| @@ -270,15 +231,13 @@ module Libis | |
| 270 231 |  | 
| 271 232 | 
             
                    protected
         | 
| 272 233 |  | 
| 273 | 
            -
                    def eml_to_html
         | 
| 274 | 
            -
             | 
| 275 | 
            -
                    end
         | 
| 234 | 
            +
                    def eml_to_html; end
         | 
| 276 235 |  | 
| 277 236 | 
             
                    private
         | 
| 278 237 |  | 
| 279 238 | 
             
                    def find_hdr(list, key)
         | 
| 280 239 | 
             
                      keys = list.keys
         | 
| 281 | 
            -
                      if k = keys.find {|x| x.to_s =~ /^#{key}$/i}
         | 
| 240 | 
            +
                      if (k = keys.find { |x| x.to_s =~ /^#{key}$/i })
         | 
| 282 241 | 
             
                        v = list[k]
         | 
| 283 242 | 
             
                        v = v.first if v.is_a? Array
         | 
| 284 243 | 
             
                        v = Rfc2047.decode(v).strip if v.is_a? String
         | 
| @@ -288,27 +247,23 @@ module Libis | |
| 288 247 | 
             
                    end
         | 
| 289 248 |  | 
| 290 249 | 
             
                    def hdr_html(key, value)
         | 
| 291 | 
            -
                      return HEADER_FIELD_TEMPLATE  | 
| 250 | 
            +
                      return format(HEADER_FIELD_TEMPLATE, key, CGI.escapeHTML(value)) if key.is_a?(String) && value.is_a?(String) && !value.empty?
         | 
| 251 | 
            +
             | 
| 292 252 | 
             
                      ''
         | 
| 293 253 | 
             
                    end
         | 
| 294 254 |  | 
| 295 | 
            -
                    def  | 
| 255 | 
            +
                    def get_attachment_data(attachments, cid)
         | 
| 296 256 | 
             
                      attachments.each do |attachment|
         | 
| 297 | 
            -
                         | 
| 298 | 
            -
             | 
| 299 | 
            -
             | 
| 300 | 
            -
             | 
| 301 | 
            -
             | 
| 302 | 
            -
                           | 
| 303 | 
            -
                         | 
| 257 | 
            +
                        next unless attachment.properties.attach_content_id == cid
         | 
| 258 | 
            +
             | 
| 259 | 
            +
                        attachment.data.rewind
         | 
| 260 | 
            +
                        return {
         | 
| 261 | 
            +
                          mime_type: attachment.properties.attach_mime_tag,
         | 
| 262 | 
            +
                          base64: Base64.encode64(attachment.data.read).gsub(/[\r\n]/, '')
         | 
| 263 | 
            +
                        }
         | 
| 304 264 | 
             
                      end
         | 
| 305 265 | 
             
                      nil
         | 
| 306 266 | 
             
                    end
         | 
| 307 | 
            -
             | 
| 308 | 
            -
                    def read_header(headers_file)
         | 
| 309 | 
            -
                      headers = YAML.load_file(headers_file)
         | 
| 310 | 
            -
                      headers.symbolize_keys
         | 
| 311 | 
            -
                    end
         | 
| 312 267 | 
             
                  end
         | 
| 313 268 | 
             
                end
         | 
| 314 269 | 
             
              end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'fileutils'
         | 
| 2 4 |  | 
| 3 5 | 
             
            require 'libis/tools/extend/string'
         | 
| @@ -9,60 +11,61 @@ require 'libis/format/config' | |
| 9 11 | 
             
            module Libis
         | 
| 10 12 | 
             
              module Format
         | 
| 11 13 | 
             
                module Tool
         | 
| 12 | 
            -
             | 
| 13 14 | 
             
                  class OfficeToPdf
         | 
| 14 15 | 
             
                    include ::Libis::Tools::Logger
         | 
| 15 16 |  | 
| 16 17 | 
             
                    def self.installed?
         | 
| 17 | 
            -
                      result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd],  | 
| 18 | 
            -
                      result[:status] | 
| 18 | 
            +
                      result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
         | 
| 19 | 
            +
                      (result[:status]).zero?
         | 
| 19 20 | 
             
                    end
         | 
| 20 21 |  | 
| 21 | 
            -
                    def self.run(source, target, options | 
| 22 | 
            -
                       | 
| 22 | 
            +
                    def self.run(source, target, **options)
         | 
| 23 | 
            +
                      new.run source, target, **options
         | 
| 23 24 | 
             
                    end
         | 
| 24 25 |  | 
| 25 | 
            -
                    def run(source, target, options | 
| 26 | 
            +
                    def run(source, target, **options)
         | 
| 26 27 | 
             
                      workdir = '/...'
         | 
| 27 28 | 
             
                      workdir = Dir.tmpdir unless Dir.exist? workdir
         | 
| 28 29 |  | 
| 29 | 
            -
                      workdir = File.join(workdir, rand( | 
| 30 | 
            +
                      workdir = File.join(workdir, rand(1_000_000).to_s)
         | 
| 30 31 | 
             
                      FileUtils.mkpath(workdir)
         | 
| 31 32 |  | 
| 32 33 | 
             
                      src_file = File.join(workdir, File.basename(source))
         | 
| 33 34 | 
             
                      FileUtils.symlink source, src_file
         | 
| 34 35 |  | 
| 35 | 
            -
                      tgt_file = File.join(workdir, File.basename(source, '.*') | 
| 36 | 
            +
                      tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
         | 
| 36 37 |  | 
| 37 38 | 
             
                      export_filter = options[:export_filter] || 'pdf'
         | 
| 38 39 |  | 
| 39 40 | 
             
                      timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
         | 
| 40 41 | 
             
                      result = Libis::Tools::Command.run(
         | 
| 41 | 
            -
             | 
| 42 | 
            -
             | 
| 43 | 
            -
             | 
| 44 | 
            -
             | 
| 45 | 
            -
             | 
| 46 | 
            -
             | 
| 42 | 
            +
                        Libis::Format::Config[:soffice_cmd], '--headless',
         | 
| 43 | 
            +
                        "-env:UserInstallation=file://#{workdir}",
         | 
| 44 | 
            +
                        '--convert-to', export_filter,
         | 
| 45 | 
            +
                        '--outdir', workdir, src_file,
         | 
| 46 | 
            +
                        timeout:,
         | 
| 47 | 
            +
                        kill_after: timeout * 2
         | 
| 47 48 | 
             
                      )
         | 
| 48 49 |  | 
| 49 | 
            -
                      raise  | 
| 50 | 
            +
                      raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
         | 
| 51 | 
            +
             | 
| 50 52 | 
             
                      warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
         | 
| 51 | 
            -
                      raise  | 
| 53 | 
            +
                      raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
         | 
| 52 54 |  | 
| 53 55 | 
             
                      FileUtils.copy tgt_file, target, preserve: true
         | 
| 54 56 |  | 
| 55 57 | 
             
                      {
         | 
| 56 58 | 
             
                        command: result,
         | 
| 57 | 
            -
                        files: [ | 
| 59 | 
            +
                        files: [target]
         | 
| 58 60 | 
             
                      }
         | 
| 59 | 
            -
             | 
| 60 61 | 
             
                    ensure
         | 
| 61 | 
            -
                       | 
| 62 | 
            -
             | 
| 62 | 
            +
                      begin
         | 
| 63 | 
            +
                        FileUtils.rmtree workdir
         | 
| 64 | 
            +
                      rescue StandardError
         | 
| 65 | 
            +
                        nil
         | 
| 66 | 
            +
                      end
         | 
| 63 67 | 
             
                    end
         | 
| 64 68 | 
             
                  end
         | 
| 65 | 
            -
             | 
| 66 69 | 
             
                end
         | 
| 67 70 | 
             
              end
         | 
| 68 71 | 
             
            end
         | 
| @@ -1,3 +1,5 @@ | |
| 1 | 
            +
            # frozen_string_literal: true
         | 
| 2 | 
            +
             | 
| 1 3 | 
             
            require 'os'
         | 
| 2 4 |  | 
| 3 5 | 
             
            require 'libis/tools/extend/string'
         | 
| @@ -9,49 +11,46 @@ require 'libis/format/config' | |
| 9 11 | 
             
            module Libis
         | 
| 10 12 | 
             
              module Format
         | 
| 11 13 | 
             
                module Tool
         | 
| 12 | 
            -
             | 
| 13 14 | 
             
                  class PdfSplit
         | 
| 14 15 | 
             
                    include ::Libis::Tools::Logger
         | 
| 15 16 |  | 
| 16 17 | 
             
                    def self.installed?
         | 
| 17 | 
            -
                      result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd],  | 
| 18 | 
            -
                      return false unless result[:status] | 
| 18 | 
            +
                      result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
         | 
| 19 | 
            +
                      return false unless (result[:status]).zero?
         | 
| 20 | 
            +
             | 
| 19 21 | 
             
                      File.exist?(Libis::Format::Config[:pdf_tool])
         | 
| 20 22 | 
             
                    end
         | 
| 21 23 |  | 
| 22 | 
            -
                    def self.run(source, target,  | 
| 23 | 
            -
                       | 
| 24 | 
            +
                    def self.run(source, target, *args)
         | 
| 25 | 
            +
                      new.run source, target, *args
         | 
| 24 26 | 
             
                    end
         | 
| 25 27 |  | 
| 26 | 
            -
                    def run(source, target,  | 
| 27 | 
            -
             | 
| 28 | 
            +
                    def run(source, target, *args)
         | 
| 28 29 | 
             
                      if OS.java?
         | 
| 29 30 | 
             
                        # TODO: import library and execute in current VM. For now do exactly as in MRI.
         | 
| 30 31 | 
             
                      end
         | 
| 31 32 |  | 
| 32 33 | 
             
                      timeout = Libis::Format::Config[:timeouts][:pdf_split]
         | 
| 33 34 | 
             
                      result = Libis::Tools::Command.run(
         | 
| 34 | 
            -
             | 
| 35 | 
            -
             | 
| 36 | 
            -
             | 
| 37 | 
            -
             | 
| 38 | 
            -
             | 
| 39 | 
            -
             | 
| 40 | 
            -
             | 
| 41 | 
            -
             | 
| 35 | 
            +
                        Libis::Format::Config[:java_cmd],
         | 
| 36 | 
            +
                        '-cp', Libis::Format::Config[:pdf_tool],
         | 
| 37 | 
            +
                        'SplitPdf',
         | 
| 38 | 
            +
                        '--file_input', source,
         | 
| 39 | 
            +
                        '--file_output', target,
         | 
| 40 | 
            +
                        *args,
         | 
| 41 | 
            +
                        timeout:,
         | 
| 42 | 
            +
                        kill_after: timeout * 2
         | 
| 42 43 | 
             
                      )
         | 
| 43 44 |  | 
| 44 | 
            -
                      raise  | 
| 45 | 
            -
                      raise  | 
| 45 | 
            +
                      raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
         | 
| 46 | 
            +
                      raise "#{self.class} errors: #{result[:err].join("\n")}" unless (result[:status]).zero? && result[:err].empty?
         | 
| 46 47 |  | 
| 47 48 | 
             
                      {
         | 
| 48 49 | 
             
                        command: result,
         | 
| 49 | 
            -
                        files: [ | 
| 50 | 
            +
                        files: [target] # TODO: collect the files
         | 
| 50 51 | 
             
                      }
         | 
| 51 | 
            -
             | 
| 52 52 | 
             
                    end
         | 
| 53 53 | 
             
                  end
         | 
| 54 | 
            -
             | 
| 55 54 | 
             
                end
         | 
| 56 55 | 
             
              end
         | 
| 57 56 | 
             
            end
         |