libis-format 1.3.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +45 -250
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +19 -25
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +33 -6
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +86 -128
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'csv'
4
2
  require 'tmpdir'
5
3
 
@@ -9,46 +7,47 @@ require 'libis/tools/logger'
9
7
  require 'libis/tools/command'
10
8
 
11
9
  require 'libis/format/config'
12
- require 'libis/format/type_database'
10
+ require 'libis/format/library'
13
11
 
14
12
  module Libis
15
13
  module Format
16
14
  module Tool
17
- class IdentificationTool
15
+
16
+ class IdentificationTool
18
17
  include Singleton
19
18
  include ::Libis::Tools::Logger
20
19
 
21
20
  def self.bad_mimetype(mimetype)
22
- instance.bad_mimetype(mimetype)
21
+ self.instance.bad_mimetype(mimetype)
23
22
  end
24
23
 
25
- def self.run(file, recursive = false, **options)
24
+ def self.run(file, recursive = false, options = {})
25
+ options ||= {}
26
26
  if file.is_a?(Array)
27
- return run_list file, **options
28
- elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
27
+ return run_list file, options
28
+ elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
29
29
  if File.directory?(file)
30
- return run_dir(file, recursive, **options)
30
+ return run_dir(file, recursive, options)
31
31
  elsif File.file?(file)
32
- return instance.run(file, **options)
32
+ return self.instance.run(file, options)
33
33
  end
34
34
  end
35
-
36
35
  raise ArgumentError,
37
36
  'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
38
37
  end
39
38
 
40
- def self.run_dir(file, recursive = true, **options)
41
- instance.run_dir file, recursive, **options
39
+ def self.run_dir(file, recursive = true, options = {})
40
+ self.instance.run_dir file, recursive, options
42
41
  end
43
42
 
44
- def self.run_list(filelist, **options)
45
- instance.run_list filelist, **options
43
+ def self.run_list(filelist , options = {})
44
+ self.instance.run_list filelist, options
46
45
  end
47
46
 
48
47
  protected
49
48
 
50
49
  def create_list_file(filelist)
51
- list_file = Tempfile.new(%w[file .list])
50
+ list_file = Tempfile.new(%w'file .list')
52
51
  filelist.each do |fname|
53
52
  list_file.write "#{fname}\n"
54
53
  end
@@ -84,22 +83,23 @@ module Libis
84
83
  # { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
85
84
  #
86
85
  def process_output(output)
87
- output.each_with_object({}) do |x, results|
86
+ output.reduce({}) do |results, x|
88
87
  filepath = File.absolute_path(x.delete(:filepath)).freeze
89
88
  results[filepath] ||= []
90
89
  results[filepath] << annotate(x)
90
+ results
91
91
  end
92
92
  end
93
93
 
94
94
  # Enhance the output with mimetype and score
95
95
  def annotate(result)
96
96
  # Enhance result with mimetype if needed
97
- bad_mimetypes.include?(result[:mimetype]) &&
98
- !bad_puids.include?(result[:puid]) &&
97
+ if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
99
98
  result[:mimetype] = get_mimetype(result[:puid])
99
+ end
100
100
 
101
101
  # Normalize the mimetype
102
- Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
102
+ Libis::Format::Library.normalize(result)
103
103
 
104
104
  # Default score is 5
105
105
  result[:score] = 5
@@ -108,54 +108,55 @@ module Libis
108
108
  result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
109
109
 
110
110
  # freeze all strings
111
- result.each { |_, v| v.freeze if v.is_a?(String) }
111
+ result.each {|_, v| v.freeze if v.is_a?(String)}
112
112
 
113
113
  # Adapt score based on matchtype
114
114
  result[:matchtype] = result[:matchtype].to_s.downcase
115
115
  case result[:matchtype]
116
116
 
117
117
  # Signature match increases score with 2
118
- when 'signature'
119
- result[:score] += 2
120
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
118
+ when 'signature'
119
+ result[:score] += 2
120
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
121
121
  # ext = File.extname(result[:filename])
122
122
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
123
123
 
124
124
  # Container match increases score with 4
125
- when 'container'
126
- result[:score] += 4
127
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
125
+ when 'container'
126
+ result[:score] += 4
127
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
128
128
  # ext = File.extname(result[:filename])
129
129
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
130
130
 
131
131
  # Extension match is the weakest identification; score is lowered by 2 points
132
- when 'extension'
133
- result[:score] -= 2
132
+ when 'extension'
133
+ result[:score] -= 2
134
134
 
135
135
  # Magic code (file tool) is to be trused even less
136
- when 'magic'
137
- result[:score] -= 3
136
+ when 'magic'
137
+ result[:score] -= 3
138
138
 
139
+ # Or no change otherwise
140
+ else
141
+ # do nothing
139
142
  end
140
143
 
141
144
  # Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
142
145
  # Office OpenXML, OpenDocument, jar, maff, svx)
143
- result[:score] -= 2 if result[:mimetype] == 'application/zip'
146
+ if result[:mimetype] == 'application/zip'
147
+ result[:score] -= 2
148
+ end
144
149
 
145
150
  # Return result enhanced with mimetype and score fields
146
151
  result
147
152
  end
148
153
 
149
154
  def get_mimetype(puid)
150
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
151
- rescue StandardError
152
- nil
155
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
153
156
  end
154
157
 
155
158
  def get_puid(mimetype)
156
- ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
157
- rescue StandardError
158
- nil
159
+ ::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
159
160
  end
160
161
 
161
162
  attr_accessor :bad_mimetypes, :bad_puids
@@ -169,6 +170,7 @@ module Libis
169
170
  @bad_mimetypes << mimetype
170
171
  end
171
172
  end
173
+
172
174
  end
173
175
  end
174
176
  end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'fileutils'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,61 +9,51 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class OfficeToPdf
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
- (result[:status]).zero?
20
- end
21
-
22
- def self.run(source, target, **options)
23
- new.run source, target, **options
16
+ def self.run(source, target, options = {})
17
+ self.new.run source, target, options
24
18
  end
25
19
 
26
- def run(source, target, **options)
20
+ def run(source, target, options = {})
27
21
  workdir = '/...'
28
22
  workdir = Dir.tmpdir unless Dir.exist? workdir
29
23
 
30
- workdir = File.join(workdir, rand(1_000_000).to_s)
24
+ workdir = File.join(workdir, rand(1000000).to_s)
31
25
  FileUtils.mkpath(workdir)
32
26
 
33
27
  src_file = File.join(workdir, File.basename(source))
34
28
  FileUtils.symlink source, src_file
35
29
 
36
- tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
30
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
37
31
 
38
32
  export_filter = options[:export_filter] || 'pdf'
39
33
 
40
34
  timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
41
35
  result = Libis::Tools::Command.run(
42
- Libis::Format::Config[:soffice_cmd], '--headless',
43
- "-env:UserInstallation=file://#{workdir}",
44
- '--convert-to', export_filter,
45
- '--outdir', workdir, src_file,
46
- timeout:,
47
- kill_after: timeout * 2
36
+ Libis::Format::Config[:soffice_cmd], '--headless',
37
+ "-env:UserInstallation=file://#{workdir}",
38
+ '--convert-to', export_filter,
39
+ '--outdir', workdir, src_file,
40
+ timeout: timeout,
41
+ kill_after: timeout * 2
48
42
  )
49
43
 
50
- raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
51
-
44
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
52
45
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
53
- raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
46
+ raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
54
47
 
55
48
  FileUtils.copy tgt_file, target, preserve: true
56
49
 
57
- {
58
- command: result,
59
- files: [target]
60
- }
61
50
  ensure
62
- begin
63
- FileUtils.rmtree workdir
64
- rescue StandardError
65
- nil
66
- end
51
+ FileUtils.rmtree workdir rescue nil
52
+
53
+ result[:out]
67
54
  end
68
55
  end
56
+
69
57
  end
70
58
  end
71
59
  end
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfCopy
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-cp', Libis::Format::Config[:pdf_tool],
30
+ 'CopyPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'os'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,44 +9,40 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfMerge
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
19
- return false unless (result[:status]).zero?
20
-
21
- File.exist?(Libis::Format::Config[:pdf_tool])
22
- end
23
-
24
- def self.run(source, target, *options)
25
- new.run source, target, options
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
26
18
  end
27
19
 
28
- def run(source, target, *options)
20
+ def run(source, target, options = [])
29
21
  source = [source] unless source.is_a?(Array)
30
22
 
31
23
  if OS.java?
32
24
  # TODO: import library and execute in current VM. For now do exactly as in MRI.
33
25
  end
34
26
 
35
- timeout = Libis::Format::Config[:timeouts][:pdf_tool]
36
- args = [
37
- Libis::Format::Config[:java_cmd],
38
- '-jar', Libis::Format::Config[:pdf_tool],
39
- 'merge',
40
- '-o', target,
41
- options,
42
- source
43
- ].flatten
44
-
45
- result = Libis::Tools::Command.run(*args, timeout: , kill_after: timeout * 2)
46
-
47
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
27
+ timeout = Libis::Format::Config[:timeouts][:pdf_merge]
28
+ result = Libis::Tools::Command.run(
29
+ Libis::Format::Config[:java_cmd],
30
+ '-cp', Libis::Format::Config[:pdf_tool],
31
+ 'MergePdf',
32
+ '--file_output', target,
33
+ *options,
34
+ *source,
35
+ timeout: timeout,
36
+ kill_after: timeout * 2
37
+ )
38
+
39
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
40
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
48
41
 
49
42
  result
50
43
  end
51
44
  end
45
+
52
46
  end
53
47
  end
54
48
  end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'os'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,38 +9,37 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfOptimizer
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
19
- (result[:status]).zero?
20
- end
21
-
22
16
  def self.run(source, target, quality)
23
- new.run source, target, quality
17
+ self.new.run source, target, quality
24
18
  end
25
19
 
26
20
  def run(source, target, quality)
27
- timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
28
- args = [
29
- Libis::Format::Config[:ghostscript_cmd],
30
- '-sDEVICE=pdfwrite',
31
- '-dCompatibilityLevel=1.4',
32
- "-dPDFSETTINGS=/#{quality}",
33
- '-dNOPAUSE',
34
- '-dBATCH',
35
- "-sOutputFile=#{target}",
36
- source.to_s
37
- ]
38
-
39
- result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
40
21
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
22
+ timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
23
+ result = Libis::Tools::Command.run(
24
+ 'gs',
25
+ '-sDEVICE=pdfwrite',
26
+ '-dCompatibilityLevel=1.4',
27
+ "-dPDFSETTINGS=/#{quality}",
28
+ '-dNOPAUSE',
29
+ '-dBATCH',
30
+ "-sOutputFile=#{target}",
31
+ "#{source}",
32
+ timeout: timeout,
33
+ kill_after: timeout * 2
34
+ )
35
+
36
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
37
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
42
38
 
43
39
  result
44
40
  end
45
41
  end
42
+
46
43
  end
47
44
  end
48
45
  end
@@ -1,20 +1,47 @@
1
- # frozen_string_literal: true
1
+ require 'os'
2
2
 
3
- require 'libis/format/tool/pdf_tool'
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
4
8
 
5
9
  module Libis
6
10
  module Format
7
11
  module Tool
12
+
8
13
  class PdfSplit
14
+ include ::Libis::Tools::Logger
9
15
 
10
- def self.run(source, target, *options)
11
- PdfTool.run('split', source, target, *options)
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
12
18
  end
13
19
 
14
- def run(source, target, *options)
15
- PdfTool.run('split', source, target, *options)
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_split]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-cp', Libis::Format::Config[:pdf_tool],
30
+ 'SplitPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
16
42
  end
17
43
  end
44
+
18
45
  end
19
46
  end
20
47
  end
@@ -1,9 +1,6 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'tempfile'
4
2
  require 'csv'
5
3
  require 'fileutils'
6
- require 'pdfinfo'
7
4
 
8
5
  require 'libis/tools/extend/string'
9
6
  require 'libis/tools/logger'
@@ -15,24 +12,19 @@ require 'libis/format'
15
12
  module Libis
16
13
  module Format
17
14
  module Tool
15
+
18
16
  class PdfToPdfa
19
17
  include ::Libis::Tools::Logger
20
18
 
21
- def self.installed?
22
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd])
23
- result.zero?
19
+ def self.run(source, target = nil, options = {})
20
+ self.new.run source, target, options
24
21
  end
25
22
 
26
- def self.run(source, target = nil, **options)
27
- new.run source, target, **options
28
- end
23
+ def run(source, target = nil, options = nil)
29
24
 
30
- def run(source, target = nil, **options)
31
25
  tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
32
26
  target ||= tmp_target
33
27
 
34
- metadata = get_metadata(source)
35
-
36
28
  icc_info = icc_options(options[:colorspace])
37
29
 
38
30
  icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
@@ -40,58 +32,52 @@ module Libis
40
32
 
41
33
  def_filename = Tools::TempFile.name('PDFA_def', '.ps')
42
34
  File.open(def_filename, 'w') do |f|
43
- f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
44
- .gsub('[**ICC profile**]', icc_file)
45
- .gsub('[**ICC reference**]', icc_info[:icc_ref])
46
- .gsub('[**METADATA**]', metadata)
35
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
36
+ gsub('[** Fill in ICC profile location **]', icc_file).
37
+ gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
47
38
  end
48
39
 
49
40
  timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
50
41
  result = Libis::Tools::Command.run(
51
- Libis::Format::Config[:ghostscript_cmd],
52
- '-q',
53
- '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dNOSAFER',
54
- # "-dNOPLATFONTS", "-dUseCIEColor=true",
55
- # "-sColorConversionStrategy=/UseDeviceIndependentColor",
56
- "-sProcessColorModel=#{icc_info[:device]}",
57
- "-sOutputICCProfile=#{icc_file}",
58
- '-dCompatibilityLevel=1.4',
59
- '-sDEVICE=pdfwrite', '-dPDFA=1', '-dPDFACompatibilityPolicy=1',
60
- '-o', File.absolute_path(target),
61
- def_filename,
62
- source,
63
- timeout:,
64
- kill_after: timeout * 2
42
+ Libis::Format::Config[:ghostscript_cmd],
43
+ '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
44
+ '-sColorConversionStrategy=/UseDeviceIndependentColor',
45
+ "-sProcessColorModel=#{icc_info[:device]}",
46
+ '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
47
+ "-sOutputICCProfile=#{icc_file}",
48
+ '-o', File.absolute_path(target),
49
+ def_filename,
50
+ source,
51
+ timeout: timeout,
52
+ kill_after: timeout * 2
65
53
  )
66
54
 
67
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
55
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
68
56
 
69
57
  FileUtils.rm [icc_file, def_filename].compact, force: true
70
58
 
59
+ unless Format::Tool::PdfaValidator.run(target)
60
+ result[:status] = -999
61
+ result[:err] << 'Failed to validate generated PDF/A file.'
62
+ end
63
+
71
64
  result
72
65
  end
73
66
 
74
- private
75
67
 
76
- def get_metadata(source)
77
- info = Pdfinfo.new(source)
78
- metadata = "/Title (#{info.title})"
79
- metadata += "\n /Author (#{info.author})" if info.author
80
- metadata += "\n /Subject (#{info.subject})" if info.subject
81
- metadata += "\n /Keywords (#{info.keywords})" if info.keywords
82
- metadata += "\n /Creator (#{info.creator})" if info.creator
83
- metadata
84
- end
68
+ private
85
69
 
86
70
  def icc_options(colorspace)
87
71
  case colorspace.to_s.downcase
88
- when 'cmyk'
89
- { icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK' }
90
- else
91
- { icc_name: 'AdobeRGB1998', icc_ref: 'sRGB', device: 'DeviceRGB' }
72
+ when 'cmyk'
73
+ {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
74
+ else
75
+ {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
92
76
  end
93
77
  end
78
+
94
79
  end
80
+
95
81
  end
96
82
  end
97
83
  end