libis-format 1.0.8 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +1 -0
  3. data/Gemfile +2 -0
  4. data/data/AdobeRGB1998.icc +0 -0
  5. data/data/PDFA_def.ps +3 -3
  6. data/lib/libis/format/config.rb +1 -1
  7. data/lib/libis/format/converter/audio_converter.rb +6 -8
  8. data/lib/libis/format/converter/chain.rb +16 -4
  9. data/lib/libis/format/converter/email_converter.rb +2 -4
  10. data/lib/libis/format/converter/fop_pdf_converter.rb +0 -2
  11. data/lib/libis/format/converter/image_converter.rb +5 -3
  12. data/lib/libis/format/converter/jp2_converter.rb +3 -3
  13. data/lib/libis/format/converter/office_converter.rb +1 -3
  14. data/lib/libis/format/converter/pdf_converter.rb +13 -4
  15. data/lib/libis/format/converter/spreadsheet_converter.rb +1 -3
  16. data/lib/libis/format/converter/video_converter.rb +5 -2
  17. data/lib/libis/format/converter/xslt_converter.rb +15 -14
  18. data/lib/libis/format/tool/email_to_pdf.rb +52 -17
  19. data/lib/libis/format/tool/{ffmpeg.rb → ff_mpeg.rb} +10 -1
  20. data/lib/libis/format/tool/fop_pdf.rb +12 -0
  21. data/lib/libis/format/tool/office_to_pdf.rb +10 -1
  22. data/lib/libis/format/tool/pdf_copy.rb +11 -1
  23. data/lib/libis/format/tool/pdf_merge.rb +11 -1
  24. data/lib/libis/format/tool/pdf_optimizer.rb +11 -2
  25. data/lib/libis/format/tool/pdf_split.rb +11 -1
  26. data/lib/libis/format/tool/pdf_to_pdfa.rb +59 -43
  27. data/lib/libis/format/tool/pdfa_validator.rb +28 -35
  28. data/lib/libis/format/tool/spreadsheet_to_ods.rb +10 -1
  29. data/lib/libis/format/tool.rb +1 -1
  30. data/lib/libis/format/version.rb +1 -1
  31. data/libis-format.gemspec +2 -0
  32. data/tools/emailconverter.jar +0 -0
  33. data/tools/pdf2pdfa +395 -0
  34. metadata +20 -4
  35. data/data/eciRGB_v2.icc +0 -0
@@ -13,6 +13,12 @@ module Libis
13
13
  class PdfSplit
14
14
  include ::Libis::Tools::Logger
15
15
 
16
+ def self.installed?
17
+ result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], "-version")
18
+ return false unless result[:status] == 0
19
+ File.exist?(Libis::Format::Config[:pdf_tool])
20
+ end
21
+
16
22
  def self.run(source, target, options = [])
17
23
  self.new.run source, target, options
18
24
  end
@@ -38,7 +44,11 @@ module Libis
38
44
  raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
45
  raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
46
 
41
- result
47
+ {
48
+ command: result,
49
+ files: [ target ] # TODO: collect the files
50
+ }
51
+
42
52
  end
43
53
  end
44
54
 
@@ -1,83 +1,99 @@
1
- require 'tempfile'
2
- require 'csv'
3
- require 'fileutils'
1
+ require "tempfile"
2
+ require "csv"
3
+ require "fileutils"
4
+ require 'pdfinfo'
4
5
 
5
- require 'libis/tools/extend/string'
6
- require 'libis/tools/logger'
7
- require 'libis/tools/command'
8
- require 'libis/tools/temp_file'
6
+ require "libis/tools/extend/string"
7
+ require "libis/tools/logger"
8
+ require "libis/tools/command"
9
+ require "libis/tools/temp_file"
9
10
 
10
- require 'libis/format'
11
+ require "libis/format"
11
12
 
12
13
  module Libis
13
14
  module Format
14
15
  module Tool
15
-
16
16
  class PdfToPdfa
17
17
  include ::Libis::Tools::Logger
18
18
 
19
+ def self.installed?
20
+ result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd])
21
+ result == 0
22
+ end
23
+
19
24
  def self.run(source, target = nil, options = {})
20
- self.new.run source, target, options
25
+ new.run source, target, options
21
26
  end
22
27
 
23
28
  def run(source, target = nil, options = nil)
24
-
25
- tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
29
+ tmp_target = Tools::TempFile.name(File.basename(source, ".*"), ".pdf")
26
30
  target ||= tmp_target
27
31
 
32
+ metadata = get_metadata(source)
33
+
28
34
  icc_info = icc_options(options[:colorspace])
29
35
 
30
- icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
36
+ icc_file = Tools::TempFile.name(icc_info[:icc_name], ".icc")
31
37
  FileUtils.cp(File.join(Libis::Format::DATA_DIR, "#{icc_info[:icc_name]}.icc"), icc_file)
32
38
 
33
- def_filename = Tools::TempFile.name('PDFA_def', '.ps')
34
- File.open(def_filename, 'w') do |f|
35
- f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
36
- gsub('[** Fill in ICC profile location **]', icc_file).
37
- gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
39
+ def_filename = Tools::TempFile.name("PDFA_def", ".ps")
40
+ File.open(def_filename, "w") do |f|
41
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, "PDFA_def.ps"))
42
+ .gsub("[**ICC profile**]", icc_file)
43
+ .gsub("[**ICC reference**]", icc_info[:icc_ref])
44
+ .gsub("[**METADATA**]", metadata)
38
45
  end
39
46
 
40
47
  timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
41
48
  result = Libis::Tools::Command.run(
42
- Libis::Format::Config[:ghostscript_cmd],
43
- '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
44
- '-sColorConversionStrategy=/UseDeviceIndependentColor',
45
- "-sProcessColorModel=#{icc_info[:device]}",
46
- '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
47
- "-sOutputICCProfile=#{icc_file}",
48
- '-o', File.absolute_path(target),
49
- def_filename,
50
- source,
51
- timeout: timeout,
52
- kill_after: timeout * 2
49
+ Libis::Format::Config[:ghostscript_cmd],
50
+ "-q",
51
+ "-dBATCH", "-dNOPAUSE", "-dNOOUTERSAVE", "-dNOSAFER",
52
+ # "-dNOPLATFONTS", "-dUseCIEColor=true",
53
+ # "-sColorConversionStrategy=/UseDeviceIndependentColor",
54
+ "-sProcessColorModel=#{icc_info[:device]}",
55
+ "-sOutputICCProfile=#{icc_file}",
56
+ "-dCompatibilityLevel=1.4",
57
+ "-sDEVICE=pdfwrite", "-dPDFA=1", "-dPDFACompatibilityPolicy=1",
58
+ "-o", File.absolute_path(target),
59
+ def_filename,
60
+ source,
61
+ timeout: timeout,
62
+ kill_after: timeout * 2
53
63
  )
54
64
 
55
- raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
65
+ raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
66
+ raise "#{self.class} failed with error #{result[:status]}: \n#{(result[:out] + result[:err]).join("\n")}" if result[:status] != 0
56
67
 
57
68
  FileUtils.rm [icc_file, def_filename].compact, force: true
58
69
 
59
- unless Format::Tool::PdfaValidator.run(target)
60
- result[:status] = -999
61
- result[:err] << 'Failed to validate generated PDF/A file.'
62
- end
63
-
64
- result
70
+ {
71
+ command: result,
72
+ files: [target]
73
+ }
65
74
  end
66
75
 
67
-
68
76
  private
69
77
 
78
+ def get_metadata(source)
79
+ info = Pdfinfo.new(source)
80
+ metadata = "/Title (#{info.title})"
81
+ metadata += "\n /Author (#{info.author})" if info.author
82
+ metadata += "\n /Subject (#{info.subject})" if info.subject
83
+ metadata += "\n /Keywords (#{info.keywords})" if info.keywords
84
+ metadata += "\n /Creator (#{info.creator})" if info.creator
85
+ metadata
86
+ end
87
+
70
88
  def icc_options(colorspace)
71
89
  case colorspace.to_s.downcase
72
- when 'cmyk'
73
- {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
74
- else
75
- {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
90
+ when "cmyk"
91
+ {icc_name: "ISOcoated_v2_eci", icc_ref: "FOGRA39L", device: "DeviceCMYK"}
92
+ else
93
+ {icc_name: "AdobeRGB1998", icc_ref: "sRGB", device: "DeviceRGB"}
76
94
  end
77
95
  end
78
-
79
96
  end
80
-
81
97
  end
82
98
  end
83
99
  end
@@ -1,24 +1,22 @@
1
- require 'fileutils'
1
+ require "fileutils"
2
2
 
3
- require 'libis/tools/extend/string'
4
- require 'libis/tools/logger'
5
- require 'libis/tools/command'
3
+ require "libis/tools/extend/string"
4
+ require "libis/tools/logger"
5
+ require "libis/tools/command"
6
6
 
7
- require 'libis/format/config'
7
+ require "libis/format/config"
8
8
 
9
9
  module Libis
10
10
  module Format
11
11
  module Tool
12
-
13
12
  class PdfaValidator
14
13
  include ::Libis::Tools::Logger
15
14
 
16
15
  def self.run(source)
17
- self.new.run source
16
+ new.run source
18
17
  end
19
18
 
20
19
  def run(source)
21
-
22
20
  src_file = File.absolute_path(source)
23
21
 
24
22
  timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
@@ -28,46 +26,41 @@ module Libis
28
26
  Dir.chdir(Dir.tmpdir)
29
27
 
30
28
  result = Libis::Tools::Command.run(
31
- pdfa,
32
- '--noxml',
33
- '--level', 'B',
34
- '--verb', '0',
35
- src_file,
36
- timeout: timeout,
37
- kill_after: timeout * 2
29
+ pdfa,
30
+ "--noxml",
31
+ "--level", "B",
32
+ "--verb", "0",
33
+ src_file,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
38
36
  )
39
37
 
40
- raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
41
- raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
38
+ raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
42
40
 
43
41
  Dir.chdir(previous_wd)
44
42
 
45
- unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
46
- warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
47
- result[:out].join("\n")
48
- return false
49
- end
43
+ out, err = result[:out].partition { |line| line =~ /^VLD-\[PASS\]/ }
44
+ result[:out] = out
45
+ result[:err] += err
46
+
47
+ result
50
48
  else
51
49
  jar = Libis::Format::Config[:preflight_jar]
52
50
  result = Libis::Tools::Command.run(
53
- Libis::Format::Config[:java_cmd],
54
- '-jar', jar,
55
- src_file,
56
- timeout: timeout,
57
- kill_after: timeout * 2
51
+ Libis::Format::Config[:java_cmd],
52
+ "-jar", jar,
53
+ src_file,
54
+ timeout: timeout,
55
+ kill_after: timeout * 2
58
56
  )
59
- raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
60
57
 
61
- unless result[:status] == 0
62
- warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
63
- result[:out].join("\n")
64
- return false
65
- end
58
+ raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
59
+
60
+ result
66
61
  end
67
- true
68
62
  end
69
63
  end
70
-
71
64
  end
72
65
  end
73
66
  end
@@ -13,6 +13,11 @@ module Libis
13
13
  class SpreadsheetToOds
14
14
  include ::Libis::Tools::Logger
15
15
 
16
+ def self.installed?
17
+ result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], "--version")
18
+ result == 0
19
+ end
20
+
16
21
  def self.run(source, target, options = {})
17
22
  self.new.run source, target, options
18
23
  end
@@ -48,10 +53,14 @@ module Libis
48
53
 
49
54
  FileUtils.copy tgt_file, target, preserve: true
50
55
 
56
+ {
57
+ command: result,
58
+ files: [ target ]
59
+ }
60
+
51
61
  ensure
52
62
  FileUtils.rmtree workdir rescue nil
53
63
 
54
- result[:out]
55
64
  end
56
65
  end
57
66
 
@@ -10,7 +10,7 @@ module Libis
10
10
  autoload :FileTool, 'libis/format/tool/file_tool'
11
11
 
12
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
13
- autoload :FFMpeg, 'libis/format/tool/ffmpeg'
13
+ autoload :FFMpeg, 'libis/format/tool/ff_mpeg'
14
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
15
15
  autoload :PdfCopy, 'libis/format/tool/pdf_copy'
16
16
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
@@ -1,5 +1,5 @@
1
1
  module Libis
2
2
  module Format
3
- VERSION = '1.0.8'
3
+ VERSION = '1.2.0'
4
4
  end
5
5
  end
data/libis-format.gemspec CHANGED
@@ -39,4 +39,6 @@ Gem::Specification.new do |spec|
39
39
  spec.add_runtime_dependency 'deep_dive', '~> 0.3'
40
40
  spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
41
41
  spec.add_runtime_dependency 'naturally', '~> 2.1'
42
+ spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
43
+
42
44
  end
Binary file