libis-format 0.9.1 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/.travis.yml +14 -9
  4. data/README.md +1 -0
  5. data/bin/pdf_copy +13 -0
  6. data/data/ISOcoated_v2_eci.icc +0 -0
  7. data/data/PDFA_def.ps +15 -7
  8. data/data/eciRGB_v2.icc +0 -0
  9. data/data/types.yml +29 -25
  10. data/lib/libis/format/config.rb +35 -0
  11. data/lib/libis/format/converter/base.rb +23 -26
  12. data/lib/libis/format/converter/chain.rb +126 -27
  13. data/lib/libis/format/converter/image_converter.rb +211 -0
  14. data/lib/libis/format/converter/office_converter.rb +46 -0
  15. data/lib/libis/format/converter/pdf_converter.rb +110 -0
  16. data/lib/libis/format/converter/repository.rb +38 -50
  17. data/lib/libis/format/droid.rb +15 -8
  18. data/lib/libis/format/fido.rb +3 -10
  19. data/lib/libis/format/identifier.rb +18 -14
  20. data/lib/libis/format/office_to_pdf.rb +52 -0
  21. data/lib/libis/format/pdf_copy.rb +50 -0
  22. data/lib/libis/format/pdf_to_pdfa.rb +79 -0
  23. data/lib/libis/format/pdfa_validator.rb +61 -0
  24. data/lib/libis/format/type_database.rb +1 -1
  25. data/lib/libis/format/version.rb +1 -1
  26. data/lib/libis/format.rb +9 -0
  27. data/libis-format.gemspec +2 -0
  28. data/spec/converter_spec.rb +212 -0
  29. data/spec/data/test-options.jpg +0 -0
  30. data/spec/data/test.jpg +0 -0
  31. data/spec/data/test.pdf.tif +0 -0
  32. data/spec/data/test.png +0 -0
  33. data/spec/data/test_pdfa.pdf +0 -0
  34. data/spec/identifier_spec.rb +1 -0
  35. data/tools/PdfTool.jar +0 -0
  36. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  37. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  38. metadata +56 -5
  39. data/data/ISOcoated.icc +0 -0
  40. data/tools/fido/argparselocal.pyc +0 -0
@@ -0,0 +1,211 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+ require 'libis/format/identifier'
5
+
6
+ require 'mini_magick'
7
+
8
+ MiniMagick.configure do |config|
9
+ config.debug = false
10
+ end
11
+
12
+ module Libis
13
+ module Format
14
+ module Converter
15
+
16
+ class ImageConverter < Libis::Format::Converter::Base
17
+
18
+ def self.input_types(format = nil)
19
+ [:TIFF, :JPG, :PNG, :BMP, :GIF, :JP2, :PDF]
20
+ end
21
+
22
+ def self.output_types(_ = nil)
23
+ [:TIFF, :JPG, :PNG, :BMP, :GIF, :JP2, :PDF]
24
+ end
25
+
26
+ def initialize
27
+ @wm_image = nil
28
+ super
29
+ end
30
+
31
+ def imaginate(_)
32
+ #force usage of this converter
33
+ end
34
+
35
+ def scale(percent)
36
+ @options[:scale] = percent
37
+ end
38
+
39
+ def resize(geometry)
40
+ @options[:resize] = geometry
41
+ end
42
+
43
+ def quality(value)
44
+ @options[:quality] = value
45
+ end
46
+
47
+ def dpi(value)
48
+ @options[:density] = value
49
+ end
50
+
51
+ def resample(value)
52
+ @options[:density] = value
53
+ end
54
+
55
+ def flatten
56
+ @flags[:flatten] = true
57
+ end
58
+
59
+ def colorspace(value)
60
+ @options[:colorspace] = value
61
+ end
62
+
63
+ # Create or use a watermark image.
64
+ #
65
+ # The watermark options are:
66
+ # - file: watermark image to use
67
+ # - text: text to create a watermark from
68
+ # - rotation: rotation of the watermark text (counter clockwise in degrees; integer number) - default 30
69
+ # - size: size of the watermark (integer > 0, 1/n of image size) - default 4
70
+ # - opacity: opacity of the watermark (fraction 0.0 - 1.0) - default 0.3
71
+ # - gap: size of the gap between watermark instances. Fractions as percentage of widht/height. - default 0.2
72
+ # If both options are given, the file will be used as-is if it exists and is a valid image file. Otherwise the
73
+ # file will be created or overwritten with a newly created watermark image.
74
+ #
75
+ # The created watermark file will be a PNG image with transparent background containing the supplied text
76
+ # slanted by 30 degrees counter-clockwise.
77
+ #
78
+ # @param [Hash] options Hash of options for watermark creation.
79
+ def watermark(options = {})
80
+ text = options[:text] || '© LIBIS'
81
+ image = options[:file] || (Dir::Tmpname.create(%w(wm_image .png)) { |_|})
82
+ @wm_size = (options[:size] || '4').to_int
83
+ @wm_opacity = ((options[:opacity] || 0.1).to_f * 100).to_int
84
+ @wm_composition = options[:composition] || 'modulate'
85
+ gap = ((options[:gap] || 0.2).to_f * 100).to_int
86
+ rotation = 360 - (options[:rotation] || 30).to_i
87
+ @wm_image = MiniMagick::Image.new(image)
88
+ unless @wm_image.valid?
89
+ # noinspection RubyResolve
90
+ MiniMagick::Tool::Convert.new do |convert|
91
+ # noinspection RubyLiteralArrayInspection
92
+ convert.background 'transparent'
93
+ convert.size('2000x2000')
94
+ convert.gravity 'Center'
95
+ convert.font('Helvetica').fill('black').pointsize(72) #.stroke('black').strokewidth(1)
96
+ convert << "label:#{text}"
97
+ convert.rotate rotation
98
+ convert.trim.repage.+
99
+ convert.bordercolor('transparent').border("#{gap}%")
100
+ convert << image
101
+ end
102
+ if options[:file]
103
+ @wm_image = MiniMagick::Image.new(image)
104
+ else
105
+ @wm_image = MiniMagick::Image.open(image)
106
+ File.delete(image)
107
+ end
108
+ # noinspection RubyResolve
109
+ unless @wm_image.valid?
110
+ error "Problem creating watermark image '#{image}'."
111
+ @wm_image = nil
112
+ end
113
+ end
114
+ end
115
+
116
+ def convert(source, target, format, opts = {})
117
+ super
118
+
119
+ if source.is_a? Array
120
+ sources = source
121
+
122
+ unless [:PDF, :TIFF, :GIF, :PBM, :PGM, :PPM].include? format
123
+ error 'Can ony assemble multiple images into multi-page/layer format'
124
+ return nil
125
+ end
126
+
127
+ assemble_and_convert(sources, target, format)
128
+
129
+ elsif File.directory?(source)
130
+ sources = Dir[File.join(source, '**', '*')].reject { |p| File.directory? p }
131
+
132
+ unless [:TIFF, :PDF].include? format
133
+ error 'Can ony assemble multiple images into multi-page/layer format'
134
+ return nil
135
+ end
136
+
137
+ assemble_and_convert(sources, target, format)
138
+
139
+ else
140
+
141
+ image = MiniMagick::Image.new(source)
142
+
143
+ if image.pages.size > 1
144
+ assemble_and_convert(image.pages.map {|page| page.path}, target, format)
145
+ else
146
+ convert_image(source, target, format)
147
+ end
148
+ end
149
+
150
+ target
151
+
152
+ end
153
+
154
+ def assemble_and_convert(sources, target, format)
155
+ converted_pages = sources.inject([]) do |list, path|
156
+ converted = Tempfile.new(['page-', ".#{Libis::Format::TypeDatabase.type_extentions(format).first}"])
157
+ convert_image(path, converted.path, format)
158
+ list << converted
159
+ end
160
+ MiniMagick::Tool::Convert.new do |b|
161
+ converted_pages.each { |page| b << page.path }
162
+ b << target
163
+ end
164
+ converted_pages.each do |temp_file|
165
+ temp_file.close
166
+ temp_file.unlink
167
+ end
168
+ end
169
+
170
+ protected
171
+
172
+ def convert_image(source, target, format)
173
+
174
+ image = MiniMagick::Image.new(source)
175
+
176
+ MiniMagick::Tool::Convert.new do |convert|
177
+ if @wm_image
178
+ convert << @wm_image.path
179
+ convert.filter('Lagrange')
180
+ convert.resize("#{image.width / @wm_size}x#{image.height / @wm_size}").write('mpr:watermark').delete.+
181
+ end
182
+
183
+ convert << source
184
+ convert.flatten if format == :JPG
185
+ if @wm_image
186
+ # noinspection RubyResolve
187
+ convert.stack do |stack|
188
+ stack.size("#{image.width}x#{image.height}")
189
+ stack << 'xc:transparent'
190
+ stack.tile('mpr:watermark')
191
+ stack.draw "rectangle 0,0,#{image.width},#{image.height}"
192
+ end
193
+ convert.compose(@wm_composition).define("compose:args=#{@wm_opacity}").composite
194
+ end
195
+
196
+ @options.each { |o, v| convert.send(o, v) }
197
+ @flags.each { |f, v| v.is_a?(FalseClass) ? convert.send(f).+ : convert.send(f) }
198
+
199
+ convert.format(format)
200
+ convert << target
201
+ end
202
+
203
+ target
204
+
205
+ end
206
+
207
+ end
208
+
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/office_to_pdf'
6
+ require 'libis/format/type_database'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class OfficeConverter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types(_ = nil)
15
+ [
16
+ :TXT,
17
+ :RTF,
18
+ :HTML,
19
+ :MSDOC,
20
+ :MSXLS,
21
+ :MSPPT,
22
+ :MSDOCX,
23
+ :MSXLSX,
24
+ :MSPPTX,
25
+ :WORDPERFECT,
26
+ ]
27
+ end
28
+
29
+ def self.output_types(_ = nil)
30
+ [:PDF]
31
+ end
32
+
33
+ def convert(source, target, format, opts = {})
34
+ super
35
+
36
+ return nil unless OfficeToPdf.run(source, target)
37
+
38
+ target
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,110 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/pdf_copy'
6
+ require 'libis/format/pdf_to_pdfa'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class PdfConverter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types(_ = nil)
15
+ [:PDF]
16
+ end
17
+
18
+ def self.output_types(_ = nil)
19
+ [:PDF, :PDFA]
20
+ end
21
+
22
+ # Set metadata for Pdf file
23
+ #
24
+ # valid metadata keys are):
25
+ # - title
26
+ # - author
27
+ # - creator
28
+ # - keywords
29
+ # - subject
30
+ #
31
+ # @param [Hash] values list of metadata values to set
32
+ def metadata(values = {})
33
+ values.each do |k, v|
34
+ next unless [:title, :author, :creator, :keywords, :subject].include?(k.to_s.to_sym)
35
+ @options["md_#{k}"] = v
36
+ end
37
+ end
38
+
39
+ # Select a partial list of pages
40
+ # @param [String] selection as described in com.itextpdf.text.pdf.SequenceList: [!][o][odd][e][even]start-end
41
+ def range(selection)
42
+ @options[:ranges] = selection
43
+ end
44
+
45
+ # Create or use a watermark image.
46
+ #
47
+ # The watermark options are (use symbols):
48
+ # - text: text to create a watermark from
49
+ # - file: watermark image to use
50
+ # - rotation: rotation of the watermark text (in degrees; integer number)
51
+ # - size: font size of the watermark text
52
+ # - opacity: opacity of the watermark (fraction 0.0 - 1.0)
53
+ # - gap: size of the gap between watermark instances. Integer value is absolute size in points (1/72 inch). Fractions are percentage of widht/height.
54
+ # If both options are given, the file will be used as-is if it exists and is a valid image file. Otherwise the
55
+ # file will be created or overwritten with a newly created watermark image.
56
+ #
57
+ # The created watermark file will be a PNG image with transparent background containing the supplied text
58
+ # slanted by 30 degrees counter-clockwise.
59
+ #
60
+ # @param [Hash] options Hash of options for watermark creation.
61
+ def watermark(options = {})
62
+ if options[:file] && File.exist?(options[:file])
63
+ @options['wm_image'] = options[:file]
64
+ else
65
+ @options['wm_text'] = (options[:text] || '© LIBIS').split('\n')
66
+ @options['wm_text_rotation'] = options[:rotation] if options[:rotation]
67
+ @options['wm_font_size'] = options[:size] if options[:size]
68
+ end
69
+ @options['wm_opacity'] = options[:opacity]
70
+ @options['wm_gap_ratio'] = options[:gap] if options[:gap].to_s =~ /^\s*(0+\.\d+|1\.0+)\s*$/
71
+ @options['wm_gap_size'] = options[:gap] if options[:gap].to_s =~ /^\s*\d+\s*$/
72
+ end
73
+
74
+ def convert(source, target, format, opts = {})
75
+ super
76
+
77
+ result = nil
78
+
79
+ unless @options.empty?
80
+ result = convert_pdf(source, target)
81
+ return nil unless result
82
+ source = result
83
+ end
84
+
85
+ if format == :PDFA and source
86
+ result = pdf_to_pdfa(source, target)
87
+ end
88
+
89
+ result
90
+
91
+ end
92
+
93
+
94
+ def convert_pdf(source, target)
95
+
96
+ using_temp(target) { |tmpname| Libis::Format::PdfCopy.run source, tmpname, @options.map { |k, v| ["--#{k}", v.to_s] }.flatten }
97
+
98
+ end
99
+
100
+ def pdf_to_pdfa(source, target)
101
+
102
+ using_temp(target) { |tmpname| Libis::Format::PdfToPdfa.run source, tmpname }
103
+
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
110
+ end
@@ -4,6 +4,7 @@ require 'set'
4
4
  require 'singleton'
5
5
 
6
6
  require 'libis/tools/logger'
7
+ require 'libis/format/config'
7
8
 
8
9
  require_relative 'chain'
9
10
 
@@ -16,11 +17,11 @@ module Libis
16
17
  include ::Libis::Tools::Logger
17
18
 
18
19
  attr_reader :converters
19
- attr_writer :converters_glob
20
+ attr_accessor :converters_glob
20
21
 
21
22
  def initialize
22
23
  @converters = Set.new
23
- @converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
24
+ @converters_glob = File.join(File.dirname(__FILE__), '*_converter.rb')
24
25
  end
25
26
 
26
27
  def Repository.register(converter_class)
@@ -28,18 +29,26 @@ module Libis
28
29
  end
29
30
 
30
31
  def Repository.get_converters
31
- if instance.converters.empty?
32
- Dir.glob(instance.converters_glob).each do |filename|
32
+ instance.get_converters
33
+ end
34
+
35
+ def get_converters
36
+ if converters.empty?
37
+ Dir.glob(converters_glob).each do |filename|
33
38
  # noinspection RubyResolve
34
39
  require File.expand_path(filename)
35
40
  end
36
41
  end
37
- instance.converters
42
+ converters
43
+ end
44
+
45
+ def Repository.get_converter_chain(src_type, tgt_type, operations = {})
46
+ instance.get_converter_chain src_type, tgt_type, operations
38
47
  end
39
48
 
40
- def Repository.get_converter_chain(src_type, tgt_type, operations = [])
49
+ def get_converter_chain(src_type, tgt_type, operations = {})
41
50
  msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
42
- chain_list = recursive_chain src_type, tgt_type, operations
51
+ chain_list = find_chains src_type, tgt_type, operations
43
52
  if chain_list.length > 1
44
53
  warn "Found more than one conversion chain for #{msg}. Picking the first one."
45
54
  end
@@ -48,59 +57,38 @@ module Libis
48
57
  return nil
49
58
  end
50
59
  chain_list.each do |chain|
51
- msg = "Base chain: #{src_type.to_s}"
52
- chain.each do |node|
53
- msg += "->#{node[:converter].name}:#{node[:target].to_s}"
54
- end
55
- debug msg
60
+ debug "Matched chain: #{chain}"
56
61
  end
57
- ::Libis::Format::Converters::Chain.new(chain_list[0])
62
+ chain_list[0]
58
63
  end
59
64
 
60
65
  private
61
66
 
62
- def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
63
- return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
64
-
65
- get_converters.each do |converter|
66
- if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
67
- c[:converter] == converter and c[:target] == tgt_type }
68
- node = Hash.new
69
- node[:converter] = converter
70
- node[:target] = tgt_type
71
- sequence = current_chain.dup
72
- sequence << node
73
- # check if the chain supports all the operations
74
- success = true
75
- operations.each do |op, _|
76
- success = false unless sequence.any? do |n|
77
- n[:converter].new.respond_to? op.to_s.downcase.to_sym
78
- end
79
- end
80
- if success
81
- # we only want to remember the shortest converter chains
82
- if !chains_found.empty? and sequence.length < chains_found[0].length
83
- chains_found.clear
84
- end
85
- chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
86
- end
87
- end
88
- end
67
+ def find_chains(src_type, tgt_type, operations)
68
+ chain = Libis::Format::Converter::Chain.new(src_type, tgt_type, operations)
69
+ build_chains(chain)
70
+ end
71
+
72
+ def build_chains(chain)
89
73
 
90
- return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
74
+ found = []
75
+ chains = [chain]
91
76
 
92
- get_converters.each do |converter|
93
- next unless converter.input_type? src_type
94
- converter.output_types(src_type).each do |tmp_type|
95
- # would like to enable the following for optimalization, but some operation may require such a step
96
- # next if tmp_type == src_type
97
- # next if current_chain.any? { |c| c[:target] == tmp_type}
98
- recursive_chain(tmp_type, tgt_type, operations, chains_found,
99
- current_chain.dup << {:converter => converter, :target => tmp_type})
77
+ # Avoid chains that are too long
78
+ Libis::Format::Config[:converter_chain_max_level].times do
79
+ new_chains = []
80
+ get_converters.each do |converter|
81
+ new_chains += chains.map { |c| c.append(converter) }.flatten
100
82
  end
83
+
84
+ found = new_chains.select { |c| c.valid?}
85
+ return found unless found.empty?
86
+
87
+ chains = new_chains
101
88
  end
102
89
 
103
- chains_found
90
+ found
91
+
104
92
  end
105
93
 
106
94
  end
@@ -1,31 +1,38 @@
1
- require 'os'
2
1
  require 'tempfile'
3
2
  require 'csv'
4
- require 'singleton'
5
3
 
6
4
  require 'libis/tools/extend/string'
7
5
  require 'libis/tools/logger'
8
6
  require 'libis/tools/command'
9
7
 
8
+ require 'libis/format/config'
9
+
10
10
  module Libis
11
11
  module Format
12
12
 
13
13
  class Droid
14
14
  include ::Libis::Tools::Logger
15
- include Singleton
16
15
 
17
16
  def self.run(file)
18
- instance.run file
17
+ self.new.run file
19
18
  end
20
19
 
21
20
  def run(file)
22
- droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
23
- droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
24
21
  profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
22
  report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
- result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
23
+ result = Libis::Tools::Command.run(
24
+ Libis::Format::Config[:droid_path],
25
+ '-a', file.escape_for_string,
26
+ '-p', profile,
27
+ '-q',
28
+ )
27
29
  warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
28
- result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
30
+ result = Libis::Tools::Command.run(
31
+ Libis::Format::Config[:droid_path],
32
+ '-e', report,
33
+ '-p', profile,
34
+ '-q'
35
+ )
29
36
  warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
30
37
  File.delete profile
31
38
  result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
@@ -1,11 +1,10 @@
1
- require 'os'
2
1
  require 'csv'
3
- require 'singleton'
4
2
 
5
3
  require 'libis/tools/extend/string'
6
4
  require 'libis/tools/logger'
7
5
  require 'libis/tools/command'
8
6
 
7
+ require 'libis/format/config'
9
8
  require 'libis/format/type_database'
10
9
 
11
10
  module Libis
@@ -13,12 +12,11 @@ module Libis
13
12
 
14
13
  class Fido
15
14
  include ::Libis::Tools::Logger
16
- include Singleton
17
15
 
18
16
  BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
17
 
20
18
  def self.run(file, formats = nil)
21
- instance.run file, formats
19
+ self.new.run file, formats
22
20
  end
23
21
 
24
22
  def run(file, xtra_formats = nil)
@@ -35,17 +33,14 @@ module Libis
35
33
  # do nothing
36
34
  end
37
35
 
38
- bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
39
- cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
40
36
  args = []
41
37
  args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
42
38
  args << "#{file.escape_for_string}"
43
- fido = ::Libis::Tools::Command.run(cmd, *args)
39
+ fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
44
40
  warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
45
41
 
46
42
  keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
47
43
  fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
48
- debug "Fido output: #{fido_output}"
49
44
 
50
45
  fido_output.each do |x|
51
46
  if x[:status] == 'OK'
@@ -72,8 +67,6 @@ module Libis
72
67
  result
73
68
  end
74
69
 
75
- debug "Fido results: #{fido_results}"
76
-
77
70
  max_score = fido_results.keys.max
78
71
 
79
72
  # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result