libis-format 1.3.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +45 -250
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +19 -25
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +33 -6
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +86 -128
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'csv'
4
2
  require 'tmpdir'
5
3
 
@@ -9,46 +7,47 @@ require 'libis/tools/logger'
9
7
  require 'libis/tools/command'
10
8
 
11
9
  require 'libis/format/config'
12
- require 'libis/format/type_database'
10
+ require 'libis/format/library'
13
11
 
14
12
  module Libis
15
13
  module Format
16
14
  module Tool
17
- class IdentificationTool
15
+
16
+ class IdentificationTool
18
17
  include Singleton
19
18
  include ::Libis::Tools::Logger
20
19
 
21
20
  def self.bad_mimetype(mimetype)
22
- instance.bad_mimetype(mimetype)
21
+ self.instance.bad_mimetype(mimetype)
23
22
  end
24
23
 
25
- def self.run(file, recursive = false, **options)
24
+ def self.run(file, recursive = false, options = {})
25
+ options ||= {}
26
26
  if file.is_a?(Array)
27
- return run_list file, **options
28
- elsif file.is_a?(String) && File.exist?(file) && File.readable?(file)
27
+ return run_list file, options
28
+ elsif file.is_a?(String) && File.exists?(file) && File.readable?(file)
29
29
  if File.directory?(file)
30
- return run_dir(file, recursive, **options)
30
+ return run_dir(file, recursive, options)
31
31
  elsif File.file?(file)
32
- return instance.run(file, **options)
32
+ return self.instance.run(file, options)
33
33
  end
34
34
  end
35
-
36
35
  raise ArgumentError,
37
36
  'IdentificationTool: file argument should be a path to an existing file or directory or a list of those'
38
37
  end
39
38
 
40
- def self.run_dir(file, recursive = true, **options)
41
- instance.run_dir file, recursive, **options
39
+ def self.run_dir(file, recursive = true, options = {})
40
+ self.instance.run_dir file, recursive, options
42
41
  end
43
42
 
44
- def self.run_list(filelist, **options)
45
- instance.run_list filelist, **options
43
+ def self.run_list(filelist , options = {})
44
+ self.instance.run_list filelist, options
46
45
  end
47
46
 
48
47
  protected
49
48
 
50
49
  def create_list_file(filelist)
51
- list_file = Tempfile.new(%w[file .list])
50
+ list_file = Tempfile.new(%w'file .list')
52
51
  filelist.each do |fname|
53
52
  list_file.write "#{fname}\n"
54
53
  end
@@ -84,22 +83,23 @@ module Libis
84
83
  # { mimetype: <mimetype>, puid: <puid>, matchtype: <matchtype>, score: <score>, ...}
85
84
  #
86
85
  def process_output(output)
87
- output.each_with_object({}) do |x, results|
86
+ output.reduce({}) do |results, x|
88
87
  filepath = File.absolute_path(x.delete(:filepath)).freeze
89
88
  results[filepath] ||= []
90
89
  results[filepath] << annotate(x)
90
+ results
91
91
  end
92
92
  end
93
93
 
94
94
  # Enhance the output with mimetype and score
95
95
  def annotate(result)
96
96
  # Enhance result with mimetype if needed
97
- bad_mimetypes.include?(result[:mimetype]) &&
98
- !bad_puids.include?(result[:puid]) &&
97
+ if bad_mimetypes.include?(result[:mimetype]) && !bad_puids.include?(result[:puid])
99
98
  result[:mimetype] = get_mimetype(result[:puid])
99
+ end
100
100
 
101
101
  # Normalize the mimetype
102
- Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
102
+ Libis::Format::Library.normalize(result)
103
103
 
104
104
  # Default score is 5
105
105
  result[:score] = 5
@@ -108,54 +108,55 @@ module Libis
108
108
  result[:score] = 1 if bad_mimetypes.include? result[:mimetype]
109
109
 
110
110
  # freeze all strings
111
- result.each { |_, v| v.freeze if v.is_a?(String) }
111
+ result.each {|_, v| v.freeze if v.is_a?(String)}
112
112
 
113
113
  # Adapt score based on matchtype
114
114
  result[:matchtype] = result[:matchtype].to_s.downcase
115
115
  case result[:matchtype]
116
116
 
117
117
  # Signature match increases score with 2
118
- when 'signature'
119
- result[:score] += 2
120
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
118
+ when 'signature'
119
+ result[:score] += 2
120
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
121
121
  # ext = File.extname(result[:filename])
122
122
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
123
123
 
124
124
  # Container match increases score with 4
125
- when 'container'
126
- result[:score] += 4
127
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
125
+ when 'container'
126
+ result[:score] += 4
127
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
128
128
  # ext = File.extname(result[:filename])
129
129
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
130
130
 
131
131
  # Extension match is the weakest identification; score is lowered by 2 points
132
- when 'extension'
133
- result[:score] -= 2
132
+ when 'extension'
133
+ result[:score] -= 2
134
134
 
135
135
  # Magic code (file tool) is to be trused even less
136
- when 'magic'
137
- result[:score] -= 3
136
+ when 'magic'
137
+ result[:score] -= 3
138
138
 
139
+ # Or no change otherwise
140
+ else
141
+ # do nothing
139
142
  end
140
143
 
141
144
  # Detecting a zip file should decrease the score as it may hide one of the many zip-based formats (e.g. epub,
142
145
  # Office OpenXML, OpenDocument, jar, maff, svx)
143
- result[:score] -= 2 if result[:mimetype] == 'application/zip'
146
+ if result[:mimetype] == 'application/zip'
147
+ result[:score] -= 2
148
+ end
144
149
 
145
150
  # Return result enhanced with mimetype and score fields
146
151
  result
147
152
  end
148
153
 
149
154
  def get_mimetype(puid)
150
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first
151
- rescue StandardError
152
- nil
155
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
153
156
  end
154
157
 
155
158
  def get_puid(mimetype)
156
- ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first
157
- rescue StandardError
158
- nil
159
+ ::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
159
160
  end
160
161
 
161
162
  attr_accessor :bad_mimetypes, :bad_puids
@@ -169,6 +170,7 @@ module Libis
169
170
  @bad_mimetypes << mimetype
170
171
  end
171
172
  end
173
+
172
174
  end
173
175
  end
174
176
  end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'fileutils'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,61 +9,51 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class OfficeToPdf
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
- (result[:status]).zero?
20
- end
21
-
22
- def self.run(source, target, **options)
23
- new.run source, target, **options
16
+ def self.run(source, target, options = {})
17
+ self.new.run source, target, options
24
18
  end
25
19
 
26
- def run(source, target, **options)
20
+ def run(source, target, options = {})
27
21
  workdir = '/...'
28
22
  workdir = Dir.tmpdir unless Dir.exist? workdir
29
23
 
30
- workdir = File.join(workdir, rand(1_000_000).to_s)
24
+ workdir = File.join(workdir, rand(1000000).to_s)
31
25
  FileUtils.mkpath(workdir)
32
26
 
33
27
  src_file = File.join(workdir, File.basename(source))
34
28
  FileUtils.symlink source, src_file
35
29
 
36
- tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.pdf")
30
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
37
31
 
38
32
  export_filter = options[:export_filter] || 'pdf'
39
33
 
40
34
  timeout = Libis::Format::Config[:timeouts][:office_to_pdf]
41
35
  result = Libis::Tools::Command.run(
42
- Libis::Format::Config[:soffice_cmd], '--headless',
43
- "-env:UserInstallation=file://#{workdir}",
44
- '--convert-to', export_filter,
45
- '--outdir', workdir, src_file,
46
- timeout:,
47
- kill_after: timeout * 2
36
+ Libis::Format::Config[:soffice_cmd], '--headless',
37
+ "-env:UserInstallation=file://#{workdir}",
38
+ '--convert-to', export_filter,
39
+ '--outdir', workdir, src_file,
40
+ timeout: timeout,
41
+ kill_after: timeout * 2
48
42
  )
49
43
 
50
- raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
51
-
44
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
52
45
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
53
- raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
46
+ raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
54
47
 
55
48
  FileUtils.copy tgt_file, target, preserve: true
56
49
 
57
- {
58
- command: result,
59
- files: [target]
60
- }
61
50
  ensure
62
- begin
63
- FileUtils.rmtree workdir
64
- rescue StandardError
65
- nil
66
- end
51
+ FileUtils.rmtree workdir rescue nil
52
+
53
+ result[:out]
67
54
  end
68
55
  end
56
+
69
57
  end
70
58
  end
71
59
  end
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfCopy
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-cp', Libis::Format::Config[:pdf_tool],
30
+ 'CopyPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'os'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,44 +9,40 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfMerge
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
19
- return false unless (result[:status]).zero?
20
-
21
- File.exist?(Libis::Format::Config[:pdf_tool])
22
- end
23
-
24
- def self.run(source, target, *options)
25
- new.run source, target, options
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
26
18
  end
27
19
 
28
- def run(source, target, *options)
20
+ def run(source, target, options = [])
29
21
  source = [source] unless source.is_a?(Array)
30
22
 
31
23
  if OS.java?
32
24
  # TODO: import library and execute in current VM. For now do exactly as in MRI.
33
25
  end
34
26
 
35
- timeout = Libis::Format::Config[:timeouts][:pdf_tool]
36
- args = [
37
- Libis::Format::Config[:java_cmd],
38
- '-jar', Libis::Format::Config[:pdf_tool],
39
- 'merge',
40
- '-o', target,
41
- options,
42
- source
43
- ].flatten
44
-
45
- result = Libis::Tools::Command.run(*args, timeout: , kill_after: timeout * 2)
46
-
47
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
27
+ timeout = Libis::Format::Config[:timeouts][:pdf_merge]
28
+ result = Libis::Tools::Command.run(
29
+ Libis::Format::Config[:java_cmd],
30
+ '-cp', Libis::Format::Config[:pdf_tool],
31
+ 'MergePdf',
32
+ '--file_output', target,
33
+ *options,
34
+ *source,
35
+ timeout: timeout,
36
+ kill_after: timeout * 2
37
+ )
38
+
39
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
40
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
48
41
 
49
42
  result
50
43
  end
51
44
  end
45
+
52
46
  end
53
47
  end
54
48
  end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'os'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,38 +9,37 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfOptimizer
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
19
- (result[:status]).zero?
20
- end
21
-
22
16
  def self.run(source, target, quality)
23
- new.run source, target, quality
17
+ self.new.run source, target, quality
24
18
  end
25
19
 
26
20
  def run(source, target, quality)
27
- timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
28
- args = [
29
- Libis::Format::Config[:ghostscript_cmd],
30
- '-sDEVICE=pdfwrite',
31
- '-dCompatibilityLevel=1.4',
32
- "-dPDFSETTINGS=/#{quality}",
33
- '-dNOPAUSE',
34
- '-dBATCH',
35
- "-sOutputFile=#{target}",
36
- source.to_s
37
- ]
38
-
39
- result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
40
21
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
22
+ timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
23
+ result = Libis::Tools::Command.run(
24
+ 'gs',
25
+ '-sDEVICE=pdfwrite',
26
+ '-dCompatibilityLevel=1.4',
27
+ "-dPDFSETTINGS=/#{quality}",
28
+ '-dNOPAUSE',
29
+ '-dBATCH',
30
+ "-sOutputFile=#{target}",
31
+ "#{source}",
32
+ timeout: timeout,
33
+ kill_after: timeout * 2
34
+ )
35
+
36
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
37
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
42
38
 
43
39
  result
44
40
  end
45
41
  end
42
+
46
43
  end
47
44
  end
48
45
  end
@@ -1,20 +1,47 @@
1
- # frozen_string_literal: true
1
+ require 'os'
2
2
 
3
- require 'libis/format/tool/pdf_tool'
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
4
8
 
5
9
  module Libis
6
10
  module Format
7
11
  module Tool
12
+
8
13
  class PdfSplit
14
+ include ::Libis::Tools::Logger
9
15
 
10
- def self.run(source, target, *options)
11
- PdfTool.run('split', source, target, *options)
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
12
18
  end
13
19
 
14
- def run(source, target, *options)
15
- PdfTool.run('split', source, target, *options)
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_split]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-cp', Libis::Format::Config[:pdf_tool],
30
+ 'SplitPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
16
42
  end
17
43
  end
44
+
18
45
  end
19
46
  end
20
47
  end
@@ -1,9 +1,6 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'tempfile'
4
2
  require 'csv'
5
3
  require 'fileutils'
6
- require 'pdfinfo'
7
4
 
8
5
  require 'libis/tools/extend/string'
9
6
  require 'libis/tools/logger'
@@ -15,24 +12,19 @@ require 'libis/format'
15
12
  module Libis
16
13
  module Format
17
14
  module Tool
15
+
18
16
  class PdfToPdfa
19
17
  include ::Libis::Tools::Logger
20
18
 
21
- def self.installed?
22
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd])
23
- result.zero?
19
+ def self.run(source, target = nil, options = {})
20
+ self.new.run source, target, options
24
21
  end
25
22
 
26
- def self.run(source, target = nil, **options)
27
- new.run source, target, **options
28
- end
23
+ def run(source, target = nil, options = nil)
29
24
 
30
- def run(source, target = nil, **options)
31
25
  tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
32
26
  target ||= tmp_target
33
27
 
34
- metadata = get_metadata(source)
35
-
36
28
  icc_info = icc_options(options[:colorspace])
37
29
 
38
30
  icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
@@ -40,58 +32,52 @@ module Libis
40
32
 
41
33
  def_filename = Tools::TempFile.name('PDFA_def', '.ps')
42
34
  File.open(def_filename, 'w') do |f|
43
- f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
44
- .gsub('[**ICC profile**]', icc_file)
45
- .gsub('[**ICC reference**]', icc_info[:icc_ref])
46
- .gsub('[**METADATA**]', metadata)
35
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
36
+ gsub('[** Fill in ICC profile location **]', icc_file).
37
+ gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
47
38
  end
48
39
 
49
40
  timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
50
41
  result = Libis::Tools::Command.run(
51
- Libis::Format::Config[:ghostscript_cmd],
52
- '-q',
53
- '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dNOSAFER',
54
- # "-dNOPLATFONTS", "-dUseCIEColor=true",
55
- # "-sColorConversionStrategy=/UseDeviceIndependentColor",
56
- "-sProcessColorModel=#{icc_info[:device]}",
57
- "-sOutputICCProfile=#{icc_file}",
58
- '-dCompatibilityLevel=1.4',
59
- '-sDEVICE=pdfwrite', '-dPDFA=1', '-dPDFACompatibilityPolicy=1',
60
- '-o', File.absolute_path(target),
61
- def_filename,
62
- source,
63
- timeout:,
64
- kill_after: timeout * 2
42
+ Libis::Format::Config[:ghostscript_cmd],
43
+ '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
44
+ '-sColorConversionStrategy=/UseDeviceIndependentColor',
45
+ "-sProcessColorModel=#{icc_info[:device]}",
46
+ '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
47
+ "-sOutputICCProfile=#{icc_file}",
48
+ '-o', File.absolute_path(target),
49
+ def_filename,
50
+ source,
51
+ timeout: timeout,
52
+ kill_after: timeout * 2
65
53
  )
66
54
 
67
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
55
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
68
56
 
69
57
  FileUtils.rm [icc_file, def_filename].compact, force: true
70
58
 
59
+ unless Format::Tool::PdfaValidator.run(target)
60
+ result[:status] = -999
61
+ result[:err] << 'Failed to validate generated PDF/A file.'
62
+ end
63
+
71
64
  result
72
65
  end
73
66
 
74
- private
75
67
 
76
- def get_metadata(source)
77
- info = Pdfinfo.new(source)
78
- metadata = "/Title (#{info.title})"
79
- metadata += "\n /Author (#{info.author})" if info.author
80
- metadata += "\n /Subject (#{info.subject})" if info.subject
81
- metadata += "\n /Keywords (#{info.keywords})" if info.keywords
82
- metadata += "\n /Creator (#{info.creator})" if info.creator
83
- metadata
84
- end
68
+ private
85
69
 
86
70
  def icc_options(colorspace)
87
71
  case colorspace.to_s.downcase
88
- when 'cmyk'
89
- { icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK' }
90
- else
91
- { icc_name: 'AdobeRGB1998', icc_ref: 'sRGB', device: 'DeviceRGB' }
72
+ when 'cmyk'
73
+ {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
74
+ else
75
+ {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
92
76
  end
93
77
  end
78
+
94
79
  end
80
+
95
81
  end
96
82
  end
97
83
  end