libis-format 0.9.1 → 0.9.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -1
- data/.travis.yml +14 -9
- data/README.md +1 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +15 -7
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +29 -25
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +23 -26
- data/lib/libis/format/converter/chain.rb +126 -27
- data/lib/libis/format/converter/image_converter.rb +211 -0
- data/lib/libis/format/converter/office_converter.rb +46 -0
- data/lib/libis/format/converter/pdf_converter.rb +110 -0
- data/lib/libis/format/converter/repository.rb +38 -50
- data/lib/libis/format/droid.rb +15 -8
- data/lib/libis/format/fido.rb +3 -10
- data/lib/libis/format/identifier.rb +18 -14
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +50 -0
- data/lib/libis/format/pdf_to_pdfa.rb +79 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +1 -1
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +9 -0
- data/libis-format.gemspec +2 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +1 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +56 -5
- data/data/ISOcoated.icc +0 -0
- data/tools/fido/argparselocal.pyc +0 -0
| @@ -76,7 +76,7 @@ module Libis | |
| 76 76 |  | 
| 77 77 | 
             
                    options ||= {}
         | 
| 78 78 |  | 
| 79 | 
            -
                    result = {}
         | 
| 79 | 
            +
                    result = { messages: [] }
         | 
| 80 80 |  | 
| 81 81 | 
             
                    # use FIDO
         | 
| 82 82 | 
             
                    # Note: FIDO does not always do a good job, mainly due to lacking container inspection.
         | 
| @@ -96,9 +96,9 @@ module Libis | |
| 96 96 | 
             
                    # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
         | 
| 97 97 | 
             
                    result = validate_against_xml_schema(file, result)
         | 
| 98 98 |  | 
| 99 | 
            -
                    result ? | 
| 100 | 
            -
             | 
| 101 | 
            -
             | 
| 99 | 
            +
                    result[:mimetype] ?
         | 
| 100 | 
            +
                        log_msg(result, :info, "Identification of '#{file}': '#{result}'") :
         | 
| 101 | 
            +
                        log_msg(result, :warn, "Could not identify MIME type of '#{file}'")
         | 
| 102 102 | 
             
                  end
         | 
| 103 103 |  | 
| 104 104 | 
             
                  def get_fido_identification(file, result = {}, xtra_formats = nil)
         | 
| @@ -111,14 +111,13 @@ module Libis | |
| 111 111 | 
             
                    result.merge! fido_result
         | 
| 112 112 | 
             
                    result[:method] = 'fido'
         | 
| 113 113 |  | 
| 114 | 
            -
                    debug "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" | 
| 115 | 
            -
                    result
         | 
| 114 | 
            +
                    log_msg(result, :debug, "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
         | 
| 116 115 | 
             
                  end
         | 
| 117 116 |  | 
| 118 117 | 
             
                  def get_droid_identification(file, result = {})
         | 
| 119 118 | 
             
                    return result if result_ok? result, :DROID
         | 
| 120 119 | 
             
                    droid_output = ::Libis::Format::Droid.run file
         | 
| 121 | 
            -
                    debug "DROID: #{droid_output}"
         | 
| 120 | 
            +
                    result[:messages] << [:debug, "DROID: #{droid_output}"]
         | 
| 122 121 | 
             
                    warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
         | 
| 123 122 | 
             
                    result.clear
         | 
| 124 123 | 
             
                    droid_output = droid_output.first
         | 
| @@ -129,18 +128,16 @@ module Libis | |
| 129 128 | 
             
                    result[:format_version] = droid_output[:format_version]
         | 
| 130 129 | 
             
                    result[:method] = 'droid'
         | 
| 131 130 |  | 
| 132 | 
            -
                    debug "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})" | 
| 133 | 
            -
                    result
         | 
| 131 | 
            +
                    log_msg(result, :debug, "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
         | 
| 134 132 | 
             
                  end
         | 
| 135 133 |  | 
| 136 134 | 
             
                  def get_file_identification(file, result = nil)
         | 
| 137 135 | 
             
                    return result if result_ok? result
         | 
| 138 | 
            -
                    result = {}
         | 
| 139 136 | 
             
                    begin
         | 
| 140 137 | 
             
                      output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
         | 
| 141 138 | 
             
                      mimetype = output.strip.split
         | 
| 142 139 | 
             
                      if mimetype
         | 
| 143 | 
            -
                        debug "File result: '#{mimetype}'"
         | 
| 140 | 
            +
                        log_msg(result, :debug, "File result: '#{mimetype}'")
         | 
| 144 141 | 
             
                        result[:mimetype] = mimetype
         | 
| 145 142 | 
             
                        result[:puid] = get_puid(mimetype)
         | 
| 146 143 | 
             
                      end
         | 
| @@ -153,9 +150,8 @@ module Libis | |
| 153 150 |  | 
| 154 151 | 
             
                  def get_extension_identification(file, result = nil)
         | 
| 155 152 | 
             
                    return result if result_ok? result
         | 
| 156 | 
            -
                    result = {}
         | 
| 157 153 | 
             
                    info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
         | 
| 158 | 
            -
                    debug "File extension info: #{info}"
         | 
| 154 | 
            +
                    log_msg result, :debug, "File extension info: #{info}"
         | 
| 159 155 | 
             
                    if info
         | 
| 160 156 | 
             
                      result[:mimetype] = info[:MIME].first rescue nil
         | 
| 161 157 | 
             
                      result[:puid] = info[:PUID].first rescue nil
         | 
| @@ -170,7 +166,7 @@ module Libis | |
| 170 166 | 
             
                    xml_validations.each do |mime, xsd_file|
         | 
| 171 167 | 
             
                      next unless xsd_file
         | 
| 172 168 | 
             
                      if doc.validates_against?(xsd_file)
         | 
| 173 | 
            -
                        debug "XML file validated against XML Schema: #{xsd_file}"
         | 
| 169 | 
            +
                        log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
         | 
| 174 170 | 
             
                        result[:mimetype] = mime
         | 
| 175 171 | 
             
                        result[:puid] = nil
         | 
| 176 172 | 
             
                        result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
         | 
| @@ -179,6 +175,14 @@ module Libis | |
| 179 175 | 
             
                    result
         | 
| 180 176 | 
             
                  end
         | 
| 181 177 |  | 
| 178 | 
            +
                  private
         | 
| 179 | 
            +
             | 
| 180 | 
            +
                  def log_msg(result, severity, text)
         | 
| 181 | 
            +
                    return {} unless result.is_a?(Hash)
         | 
| 182 | 
            +
                    (result[:messages] ||= []) << [severity, text]
         | 
| 183 | 
            +
                    result
         | 
| 184 | 
            +
                  end
         | 
| 185 | 
            +
             | 
| 182 186 | 
             
                end
         | 
| 183 187 |  | 
| 184 188 | 
             
              end
         | 
| @@ -0,0 +1,52 @@ | |
| 1 | 
            +
            require 'fileutils'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'libis/tools/extend/string'
         | 
| 4 | 
            +
            require 'libis/tools/logger'
         | 
| 5 | 
            +
            require 'libis/tools/command'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require 'libis/format/config'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Libis
         | 
| 10 | 
            +
              module Format
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                class OfficeToPdf
         | 
| 13 | 
            +
                  include ::Libis::Tools::Logger
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def self.run(source, target, options = {})
         | 
| 16 | 
            +
                    self.new.run source, target, options
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def run(source, target, options = {})
         | 
| 20 | 
            +
                    workdir = '/...'
         | 
| 21 | 
            +
                    workdir = Dir.tmpdir unless Dir.exist? workdir
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    workdir = File.join(workdir, rand(1000000).to_s)
         | 
| 24 | 
            +
                    FileUtils.mkpath(workdir)
         | 
| 25 | 
            +
             | 
| 26 | 
            +
                    src_file = File.join(workdir, File.basename(source))
         | 
| 27 | 
            +
                    FileUtils.link source, src_file
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
         | 
| 30 | 
            +
             | 
| 31 | 
            +
                    export_filter = options[:export_filter] || 'pdf'
         | 
| 32 | 
            +
             | 
| 33 | 
            +
                    result = Libis::Tools::Command.run(
         | 
| 34 | 
            +
                        Libis::Format::Config[:soffice_path], '--headless',
         | 
| 35 | 
            +
                        '--convert-to', export_filter,
         | 
| 36 | 
            +
                        '--outdir', workdir, src_file
         | 
| 37 | 
            +
                    )
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    unless result[:status] == 0
         | 
| 40 | 
            +
                      warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
         | 
| 41 | 
            +
                      return false
         | 
| 42 | 
            +
                    end
         | 
| 43 | 
            +
             | 
| 44 | 
            +
                    FileUtils.copy tgt_file, target, preserve: true
         | 
| 45 | 
            +
                    FileUtils.rmtree workdir
         | 
| 46 | 
            +
             | 
| 47 | 
            +
                    result[:out]
         | 
| 48 | 
            +
                  end
         | 
| 49 | 
            +
                end
         | 
| 50 | 
            +
             | 
| 51 | 
            +
              end
         | 
| 52 | 
            +
            end
         | 
| @@ -0,0 +1,50 @@ | |
| 1 | 
            +
            require 'os'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'libis/tools/extend/string'
         | 
| 4 | 
            +
            require 'libis/tools/logger'
         | 
| 5 | 
            +
            require 'libis/tools/command'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require 'libis/format/config'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Libis
         | 
| 10 | 
            +
              module Format
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                class PdfCopy
         | 
| 13 | 
            +
                  include ::Libis::Tools::Logger
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def self.run(source, target, options = [])
         | 
| 16 | 
            +
                    self.new.run source, target, options
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def run(source, target, options = [])
         | 
| 20 | 
            +
                    tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
         | 
| 21 | 
            +
                    jar_file = File.join(tool_dir, 'PdfTool.jar')
         | 
| 22 | 
            +
                    if OS.java?
         | 
| 23 | 
            +
                      # TODO: import library and execute in current VM. For now do exactly as in MRI
         | 
| 24 | 
            +
                      result = Libis::Tools::Command.run(
         | 
| 25 | 
            +
                          Libis::Format::Config[:java_path],
         | 
| 26 | 
            +
                          '-jar', jar_file,
         | 
| 27 | 
            +
                          'CopyPdf',
         | 
| 28 | 
            +
                          '--file_input', source,
         | 
| 29 | 
            +
                          '--file_output', target,
         | 
| 30 | 
            +
                          *options
         | 
| 31 | 
            +
                      )
         | 
| 32 | 
            +
                      warn "PdfCopy errors: #{result[:err].join("\n")}" unless result[:status] == 0
         | 
| 33 | 
            +
                      result[:out]
         | 
| 34 | 
            +
                    else
         | 
| 35 | 
            +
                      result = Libis::Tools::Command.run(
         | 
| 36 | 
            +
                          Libis::Format::Config[:java_path],
         | 
| 37 | 
            +
                          '-jar', jar_file,
         | 
| 38 | 
            +
                          'CopyPdf',
         | 
| 39 | 
            +
                          '--file_input', source,
         | 
| 40 | 
            +
                          '--file_output', target,
         | 
| 41 | 
            +
                          *options
         | 
| 42 | 
            +
                      )
         | 
| 43 | 
            +
                      warn "PdfCopy errors: #{result[:err].join("\n")}" unless result[:status] == 0
         | 
| 44 | 
            +
                      result[:out]
         | 
| 45 | 
            +
                    end
         | 
| 46 | 
            +
                  end
         | 
| 47 | 
            +
                end
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
            end
         | 
| @@ -0,0 +1,79 @@ | |
| 1 | 
            +
            require 'tempfile'
         | 
| 2 | 
            +
            require 'csv'
         | 
| 3 | 
            +
            require 'fileutils'
         | 
| 4 | 
            +
             | 
| 5 | 
            +
            require 'libis/tools/extend/string'
         | 
| 6 | 
            +
            require 'libis/tools/logger'
         | 
| 7 | 
            +
            require 'libis/tools/command'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            require 'libis/format'
         | 
| 10 | 
            +
             | 
| 11 | 
            +
            module Libis
         | 
| 12 | 
            +
              module Format
         | 
| 13 | 
            +
             | 
| 14 | 
            +
                class PdfToPdfa
         | 
| 15 | 
            +
                  include ::Libis::Tools::Logger
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                  def self.run(source, target = nil, options = {})
         | 
| 18 | 
            +
                    self.new.run source, target, options
         | 
| 19 | 
            +
                  end
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                  def run(source, target = nil, options = nil)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
         | 
| 24 | 
            +
             | 
| 25 | 
            +
                    data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
         | 
| 26 | 
            +
             | 
| 27 | 
            +
                    icc_info = icc_options(options[:colorspace])
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                    icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
         | 
| 30 | 
            +
                    FileUtils.cp(File.join(data_dir, "#{icc_info[:icc_name]}.icc"), icc_file)
         | 
| 31 | 
            +
             | 
| 32 | 
            +
                    def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
         | 
| 33 | 
            +
                    File.open(def_filename, 'w') do |f|
         | 
| 34 | 
            +
                      f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
         | 
| 35 | 
            +
                                 gsub('[** Fill in ICC profile location **]', icc_file).
         | 
| 36 | 
            +
                                 gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
         | 
| 37 | 
            +
                    end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                    result = Libis::Tools::Command.run(
         | 
| 40 | 
            +
                        Libis::Format::Config[:ghostscript_path],
         | 
| 41 | 
            +
                        '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
         | 
| 42 | 
            +
                        '-sColorConversionStrategy=/UseDeviceIndependentColor',
         | 
| 43 | 
            +
                        "-sProcessColorModel=#{icc_info[:device]}",
         | 
| 44 | 
            +
                        '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
         | 
| 45 | 
            +
                        "-sOutputICCProfile=#{icc_file}",
         | 
| 46 | 
            +
                        '-o', File.absolute_path(target),
         | 
| 47 | 
            +
                        def_filename,
         | 
| 48 | 
            +
                        source
         | 
| 49 | 
            +
                    )
         | 
| 50 | 
            +
             | 
| 51 | 
            +
                    FileUtils.rm [icc_file, def_filename].compact, force: true
         | 
| 52 | 
            +
                    unless result[:status] == 0
         | 
| 53 | 
            +
                      warn (['Pdf2PdfA errors:'] + result[:err] + result[:out]).join("\n").gsub('%', '%%')
         | 
| 54 | 
            +
                    end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                    unless PdfaValidator.run(target)
         | 
| 57 | 
            +
                      error "Failed to generate correct PDF/A file from '%s'", source
         | 
| 58 | 
            +
                      return nil
         | 
| 59 | 
            +
                    end
         | 
| 60 | 
            +
             | 
| 61 | 
            +
                    target
         | 
| 62 | 
            +
                  end
         | 
| 63 | 
            +
             | 
| 64 | 
            +
             | 
| 65 | 
            +
                  private
         | 
| 66 | 
            +
             | 
| 67 | 
            +
                  def icc_options(colorspace)
         | 
| 68 | 
            +
                    case colorspace.to_s.downcase
         | 
| 69 | 
            +
                      when 'cmyk'
         | 
| 70 | 
            +
                        {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
         | 
| 71 | 
            +
                      else
         | 
| 72 | 
            +
                        {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
         | 
| 73 | 
            +
                    end
         | 
| 74 | 
            +
                  end
         | 
| 75 | 
            +
             | 
| 76 | 
            +
                end
         | 
| 77 | 
            +
             | 
| 78 | 
            +
              end
         | 
| 79 | 
            +
            end
         | 
| @@ -0,0 +1,61 @@ | |
| 1 | 
            +
            require 'fileutils'
         | 
| 2 | 
            +
             | 
| 3 | 
            +
            require 'libis/tools/extend/string'
         | 
| 4 | 
            +
            require 'libis/tools/logger'
         | 
| 5 | 
            +
            require 'libis/tools/command'
         | 
| 6 | 
            +
             | 
| 7 | 
            +
            require 'libis/format/config'
         | 
| 8 | 
            +
             | 
| 9 | 
            +
            module Libis
         | 
| 10 | 
            +
              module Format
         | 
| 11 | 
            +
             | 
| 12 | 
            +
                class PdfaValidator
         | 
| 13 | 
            +
                  include ::Libis::Tools::Logger
         | 
| 14 | 
            +
             | 
| 15 | 
            +
                  def self.run(source)
         | 
| 16 | 
            +
                    self.new.run source
         | 
| 17 | 
            +
                  end
         | 
| 18 | 
            +
             | 
| 19 | 
            +
                  def run(source)
         | 
| 20 | 
            +
             | 
| 21 | 
            +
                    src_file = File.absolute_path(source)
         | 
| 22 | 
            +
             | 
| 23 | 
            +
                    if (pdfa = Libis::Format::Config[:pdfa_path])
         | 
| 24 | 
            +
                      # Keep it clean: tool generates fontconfig/ cache dir in current working dir
         | 
| 25 | 
            +
                      previous_wd = Dir.getwd
         | 
| 26 | 
            +
                      Dir.chdir(Dir.tmpdir)
         | 
| 27 | 
            +
             | 
| 28 | 
            +
                      result = Libis::Tools::Command.run(
         | 
| 29 | 
            +
                          pdfa,
         | 
| 30 | 
            +
                          '--noxml',
         | 
| 31 | 
            +
                          '--level', 'B',
         | 
| 32 | 
            +
                          '--verb', '0',
         | 
| 33 | 
            +
                          src_file
         | 
| 34 | 
            +
                      )
         | 
| 35 | 
            +
             | 
| 36 | 
            +
                      Dir.chdir(previous_wd)
         | 
| 37 | 
            +
             | 
| 38 | 
            +
                      unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
         | 
| 39 | 
            +
                        warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
         | 
| 40 | 
            +
                             result[:out].join("\n")
         | 
| 41 | 
            +
                        return false
         | 
| 42 | 
            +
                      end
         | 
| 43 | 
            +
                    else
         | 
| 44 | 
            +
                      jar = File.join(ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
         | 
| 45 | 
            +
                      result = Libis::Tools::Command.run(
         | 
| 46 | 
            +
                          Libis::Format::Config[:java_path],
         | 
| 47 | 
            +
                          '-jar', jar,
         | 
| 48 | 
            +
                          src_file
         | 
| 49 | 
            +
                      )
         | 
| 50 | 
            +
                      unless result[:status] == 0
         | 
| 51 | 
            +
                        warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
         | 
| 52 | 
            +
                             result[:out].join("\n")
         | 
| 53 | 
            +
                        return false
         | 
| 54 | 
            +
                      end
         | 
| 55 | 
            +
                    end
         | 
| 56 | 
            +
                    true
         | 
| 57 | 
            +
                  end
         | 
| 58 | 
            +
                end
         | 
| 59 | 
            +
             | 
| 60 | 
            +
              end
         | 
| 61 | 
            +
            end
         | 
    
        data/lib/libis/format/version.rb
    CHANGED
    
    
    
        data/lib/libis/format.rb
    CHANGED
    
    | @@ -2,11 +2,20 @@ require 'libis/format/version' | |
| 2 2 |  | 
| 3 3 | 
             
            module Libis
         | 
| 4 4 | 
             
              module Format
         | 
| 5 | 
            +
                autoload :Config, 'libis/format/config'
         | 
| 5 6 | 
             
                autoload :TypeDatabase, 'libis/format/type_database'
         | 
| 6 7 | 
             
                autoload :Identifier, 'libis/format/identifier'
         | 
| 7 8 | 
             
                autoload :Fido, 'libis/format/fido'
         | 
| 8 9 | 
             
                autoload :Droid, 'libis/format/droid'
         | 
| 10 | 
            +
                autoload :OfficeToPdf, 'libis/format/office_to_pdf'
         | 
| 11 | 
            +
                autoload :PdfCopy, 'libis/format/pdf_copy'
         | 
| 12 | 
            +
                autoload :PdfToPdfa, 'libis/format/pdf_to_pdfa'
         | 
| 13 | 
            +
                autoload :PdfaValidator, 'libis/format/pdfa_validator'
         | 
| 9 14 |  | 
| 10 15 | 
             
                autoload :Converter, 'libis/format/converter'
         | 
| 16 | 
            +
             | 
| 17 | 
            +
                ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
         | 
| 18 | 
            +
                DATA_DIR = File.join(ROOT_DIR, 'data')
         | 
| 19 | 
            +
             | 
| 11 20 | 
             
              end
         | 
| 12 21 | 
             
            end
         | 
    
        data/libis-format.gemspec
    CHANGED
    
    
| @@ -0,0 +1,212 @@ | |
| 1 | 
            +
            # encoding: utf-8
         | 
| 2 | 
            +
            require 'spec_helper'
         | 
| 3 | 
            +
             | 
| 4 | 
            +
            require 'libis/format/converter/image_converter'
         | 
| 5 | 
            +
            require 'libis/format/converter/pdf_converter'
         | 
| 6 | 
            +
            require 'libis/format/converter/office_converter'
         | 
| 7 | 
            +
             | 
| 8 | 
            +
            RSpec::Matchers.define(:be_same_file_as) do |exected_file_path|
         | 
| 9 | 
            +
              match do |actual_file_path|
         | 
| 10 | 
            +
                expect(md5_hash(actual_file_path)).to eq md5_hash(exected_file_path)
         | 
| 11 | 
            +
              end
         | 
| 12 | 
            +
             | 
| 13 | 
            +
              def md5_hash(file_path)
         | 
| 14 | 
            +
                Digest::MD5.hexdigest(File.read(file_path))
         | 
| 15 | 
            +
              end
         | 
| 16 | 
            +
            end
         | 
| 17 | 
            +
             | 
| 18 | 
            +
            describe 'Converters' do
         | 
| 19 | 
            +
             | 
| 20 | 
            +
              let(:repository) { Libis::Format::Converter::Repository }
         | 
| 21 | 
            +
              let(:file_dir) { File.dirname(__FILE__)}
         | 
| 22 | 
            +
             | 
| 23 | 
            +
              before(:all) {
         | 
| 24 | 
            +
                Libis::Tools::Config[:logger].level = Logger::WARN
         | 
| 25 | 
            +
              }
         | 
| 26 | 
            +
             | 
| 27 | 
            +
              context 'Repository' do
         | 
| 28 | 
            +
             | 
| 29 | 
            +
                it 'loads all converters' do
         | 
| 30 | 
            +
                  expect(repository.get_converters.size).to eq 3
         | 
| 31 | 
            +
                  # noinspection RubyResolve
         | 
| 32 | 
            +
                  expect(repository.get_converters.map(&:to_s)).to include 'Libis::Format::Converter::ImageConverter'
         | 
| 33 | 
            +
                  # noinspection RubyResolve
         | 
| 34 | 
            +
                  expect(repository.get_converters.map(&:to_s)).to include 'Libis::Format::Converter::OfficeConverter'
         | 
| 35 | 
            +
                  # noinspection RubyResolve
         | 
| 36 | 
            +
                  expect(repository.get_converters.map(&:to_s)).to include 'Libis::Format::Converter::PdfConverter'
         | 
| 37 | 
            +
                end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
                it 'creates simple converter chain' do
         | 
| 40 | 
            +
                  chain = repository.get_converter_chain(:TIFF, :PDF)
         | 
| 41 | 
            +
                  expect(chain).to_not be nil
         | 
| 42 | 
            +
                  expect(chain.to_array.size).to eq 1
         | 
| 43 | 
            +
                  expect(chain.to_array).to match [{converter: Libis::Format::Converter::ImageConverter, input: :TIFF, output: :PDF}]
         | 
| 44 | 
            +
                end
         | 
| 45 | 
            +
             | 
| 46 | 
            +
                it 'creates complex chain' do
         | 
| 47 | 
            +
                  chain = repository.get_converter_chain(:TIFF, :PDFA)
         | 
| 48 | 
            +
                  expect(chain).to_not be nil
         | 
| 49 | 
            +
                  expect(chain.to_array.size).to eq 2
         | 
| 50 | 
            +
                  expect(chain.to_array).to match [
         | 
| 51 | 
            +
                                                      {converter: Libis::Format::Converter::ImageConverter, input: :TIFF, output: :PDF},
         | 
| 52 | 
            +
                                                      {converter: Libis::Format::Converter::PdfConverter, input: :PDF, output: :PDFA},
         | 
| 53 | 
            +
                                                  ]
         | 
| 54 | 
            +
                end
         | 
| 55 | 
            +
             | 
| 56 | 
            +
                it 'creates converter chain with options' do
         | 
| 57 | 
            +
                  chain = repository.get_converter_chain(:TIFF, :PDF, {watermark: {}})
         | 
| 58 | 
            +
                  expect(chain).to_not be nil
         | 
| 59 | 
            +
                  expect(chain.to_array.size).to eq 1
         | 
| 60 | 
            +
                  expect(chain.to_array).to match [
         | 
| 61 | 
            +
                                                      {converter: Libis::Format::Converter::ImageConverter, input: :TIFF, output: :PDF, operations: [{method: :watermark, argument: {}}]}
         | 
| 62 | 
            +
                                                  ]
         | 
| 63 | 
            +
                end
         | 
| 64 | 
            +
             | 
| 65 | 
            +
                it 'perfers operations to the end of the chain' do
         | 
| 66 | 
            +
                  chain = repository.get_converter_chain(:TIFF, :PDFA, {watermark: {}})
         | 
| 67 | 
            +
                  expect(chain).to_not be nil
         | 
| 68 | 
            +
                  expect(chain.to_array.size).to eq 2
         | 
| 69 | 
            +
                  expect(chain.to_array).to match [
         | 
| 70 | 
            +
                                                      {converter: Libis::Format::Converter::ImageConverter, input: :TIFF, output: :PDF},
         | 
| 71 | 
            +
                                                      {converter: Libis::Format::Converter::PdfConverter, input: :PDF, output: :PDFA, operations: [{method: :watermark, argument: {}}]}
         | 
| 72 | 
            +
                                                  ]
         | 
| 73 | 
            +
                end
         | 
| 74 | 
            +
             | 
| 75 | 
            +
              end
         | 
| 76 | 
            +
             | 
| 77 | 
            +
              context 'Image Converter' do
         | 
| 78 | 
            +
             | 
| 79 | 
            +
                let(:converter) { Libis::Format::Converter::ImageConverter.new }
         | 
| 80 | 
            +
             | 
| 81 | 
            +
                it 'converts TIFF to JPEG' do
         | 
| 82 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.tif')
         | 
| 83 | 
            +
                  ref_file = File.join(file_dir, 'data', 'test.jpg')
         | 
| 84 | 
            +
                  tgt_file = File.join('', 'tmp', 'test.jpg')
         | 
| 85 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 86 | 
            +
                  result = converter.convert(src_file, tgt_file, :JPG)
         | 
| 87 | 
            +
                  expect(result).to eq tgt_file
         | 
| 88 | 
            +
                  expect(tgt_file).to be_same_file_as ref_file
         | 
| 89 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 90 | 
            +
                end
         | 
| 91 | 
            +
             | 
| 92 | 
            +
                it 'converts TIFF to PNG' do
         | 
| 93 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.tif')
         | 
| 94 | 
            +
                  ref_file = File.join(file_dir, 'data', 'test.png')
         | 
| 95 | 
            +
                  tgt_file = File.join('', 'tmp', 'test.png')
         | 
| 96 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 97 | 
            +
                  result = converter.convert(src_file, tgt_file, :PNG)
         | 
| 98 | 
            +
                  expect(result).to eq tgt_file
         | 
| 99 | 
            +
                  expect(tgt_file).to be_same_file_as ref_file
         | 
| 100 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 101 | 
            +
                end
         | 
| 102 | 
            +
             | 
| 103 | 
            +
                it 'converts PDF to TIFF' do
         | 
| 104 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.pdf')
         | 
| 105 | 
            +
                  ref_file = File.join(file_dir, 'data', 'test.pdf.tif')
         | 
| 106 | 
            +
                  tgt_file = File.join('', 'tmp', 'test.pdf.tif')
         | 
| 107 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 108 | 
            +
                  result = converter.convert(src_file, tgt_file, :TIFF)
         | 
| 109 | 
            +
                  expect(result).to eq tgt_file
         | 
| 110 | 
            +
                  expect(tgt_file).to be_same_file_as ref_file
         | 
| 111 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
             | 
| 114 | 
            +
                it 'converts TIFF to JPEG with many options' do
         | 
| 115 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.tif')
         | 
| 116 | 
            +
                  ref_file = File.join(file_dir, 'data', 'test-options.jpg')
         | 
| 117 | 
            +
                  tgt_file = File.join('', 'tmp', 'test-options.jpg')
         | 
| 118 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 119 | 
            +
                  converter.watermark(text: 'RSPEC', size: 5, opacity: 0.1, rotation: 15, gap: 0.5, composition: 'modulate')
         | 
| 120 | 
            +
                  result = converter.convert(src_file, tgt_file, :JPG, options: {scale: '150%', quality: '70%'})
         | 
| 121 | 
            +
                  expect(result).to eq tgt_file
         | 
| 122 | 
            +
                  expect(tgt_file).to be_same_file_as ref_file
         | 
| 123 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 124 | 
            +
                end
         | 
| 125 | 
            +
             | 
| 126 | 
            +
              end
         | 
| 127 | 
            +
             | 
| 128 | 
            +
              context 'Pdf Converter' do
         | 
| 129 | 
            +
             | 
| 130 | 
            +
                let(:converter) { Libis::Format::Converter::PdfConverter.new }
         | 
| 131 | 
            +
             | 
| 132 | 
            +
                it 'converts PDF to PDF/A' do
         | 
| 133 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.pdf')
         | 
| 134 | 
            +
                  tgt_file = File.join('', 'tmp', 'test_pdfa.pdf')
         | 
| 135 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 136 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDFA)
         | 
| 137 | 
            +
                  expect(result).to eq tgt_file
         | 
| 138 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 139 | 
            +
                end
         | 
| 140 | 
            +
             | 
| 141 | 
            +
              end
         | 
| 142 | 
            +
             | 
| 143 | 
            +
              context 'Office Converter' do
         | 
| 144 | 
            +
             | 
| 145 | 
            +
                let(:converter) { Libis::Format::Converter::OfficeConverter.new }
         | 
| 146 | 
            +
             | 
| 147 | 
            +
                it 'converts Word document to PDF' do
         | 
| 148 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.doc')
         | 
| 149 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_doc.pdf')
         | 
| 150 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 151 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 152 | 
            +
                  expect(result).to eq tgt_file
         | 
| 153 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 154 | 
            +
                end
         | 
| 155 | 
            +
             | 
| 156 | 
            +
                it 'converts Word 2010 document to PDF' do
         | 
| 157 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.docx')
         | 
| 158 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_docx.pdf')
         | 
| 159 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 160 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 161 | 
            +
                  expect(result).to eq tgt_file
         | 
| 162 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 163 | 
            +
                end
         | 
| 164 | 
            +
             | 
| 165 | 
            +
                it 'converts OpenOffice document to PDF' do
         | 
| 166 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.odt')
         | 
| 167 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_odt.pdf')
         | 
| 168 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 169 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 170 | 
            +
                  expect(result).to eq tgt_file
         | 
| 171 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 172 | 
            +
                end
         | 
| 173 | 
            +
             | 
| 174 | 
            +
                it 'converts RTF document to PDF' do
         | 
| 175 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.rtf')
         | 
| 176 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_rtf.pdf')
         | 
| 177 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 178 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 179 | 
            +
                  expect(result).to eq tgt_file
         | 
| 180 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 181 | 
            +
                end
         | 
| 182 | 
            +
             | 
| 183 | 
            +
                it 'converts TXT document to PDF' do
         | 
| 184 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.txt')
         | 
| 185 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_txt.pdf')
         | 
| 186 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 187 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 188 | 
            +
                  expect(result).to eq tgt_file
         | 
| 189 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 190 | 
            +
                end
         | 
| 191 | 
            +
             | 
| 192 | 
            +
                it 'converts Excel to PDF' do
         | 
| 193 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.xls')
         | 
| 194 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_xls.pdf')
         | 
| 195 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 196 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 197 | 
            +
                  expect(result).to eq tgt_file
         | 
| 198 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 199 | 
            +
                end
         | 
| 200 | 
            +
             | 
| 201 | 
            +
                it 'converts Excel 2011 to PDF' do
         | 
| 202 | 
            +
                  src_file = File.join(file_dir, 'data', 'test.xlsx')
         | 
| 203 | 
            +
                  tgt_file = File.join(file_dir, 'work', 'test_xlsx.pdf')
         | 
| 204 | 
            +
                  FileUtils.mkdir_p File.dirname(tgt_file)
         | 
| 205 | 
            +
                  result = converter.convert(src_file, tgt_file, :PDF)
         | 
| 206 | 
            +
                  expect(result).to eq tgt_file
         | 
| 207 | 
            +
                  FileUtils.rm tgt_file, force: true
         | 
| 208 | 
            +
                end
         | 
| 209 | 
            +
             | 
| 210 | 
            +
              end
         | 
| 211 | 
            +
             | 
| 212 | 
            +
            end
         | 
| Binary file | 
    
        data/spec/data/test.jpg
    ADDED
    
    | Binary file | 
| Binary file | 
    
        data/spec/data/test.png
    CHANGED
    
    | Binary file | 
| Binary file | 
    
        data/spec/identifier_spec.rb
    CHANGED
    
    | @@ -47,6 +47,7 @@ describe 'Identfier' do | |
| 47 47 | 
             
                    'test-lzw.tif' => {mimetype: 'image/tiff', puid: 'fmt/353'},
         | 
| 48 48 | 
             
                    'test.ps' => {mimetype: 'application/postscript', puid: 'x-fmt/408'},
         | 
| 49 49 | 
             
                    'test.png' => {mimetype: 'image/png', puid: 'fmt/11'},
         | 
| 50 | 
            +
                    'test.jpg' => {mimetype: 'image/jpeg', puid: 'fmt/43'},
         | 
| 50 51 | 
             
                    'test.gif' => {mimetype: 'image/gif', puid: 'fmt/4'},
         | 
| 51 52 | 
             
                    'test.xml' => {mimetype: 'application/xml', puid: 'fmt/101'},
         | 
| 52 53 | 
             
                    'test-ead.xml' => {mimetype: 'archive/ead', puid: 'fmt/101'},
         | 
    
        data/tools/PdfTool.jar
    ADDED
    
    | Binary file | 
| Binary file | 
| Binary file |