libis-format 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/.travis.yml +14 -9
  4. data/README.md +1 -0
  5. data/bin/pdf_copy +13 -0
  6. data/data/ISOcoated_v2_eci.icc +0 -0
  7. data/data/PDFA_def.ps +15 -7
  8. data/data/eciRGB_v2.icc +0 -0
  9. data/data/types.yml +29 -25
  10. data/lib/libis/format/config.rb +35 -0
  11. data/lib/libis/format/converter/base.rb +23 -26
  12. data/lib/libis/format/converter/chain.rb +126 -27
  13. data/lib/libis/format/converter/image_converter.rb +211 -0
  14. data/lib/libis/format/converter/office_converter.rb +46 -0
  15. data/lib/libis/format/converter/pdf_converter.rb +110 -0
  16. data/lib/libis/format/converter/repository.rb +38 -50
  17. data/lib/libis/format/droid.rb +15 -8
  18. data/lib/libis/format/fido.rb +3 -10
  19. data/lib/libis/format/identifier.rb +18 -14
  20. data/lib/libis/format/office_to_pdf.rb +52 -0
  21. data/lib/libis/format/pdf_copy.rb +50 -0
  22. data/lib/libis/format/pdf_to_pdfa.rb +79 -0
  23. data/lib/libis/format/pdfa_validator.rb +61 -0
  24. data/lib/libis/format/type_database.rb +1 -1
  25. data/lib/libis/format/version.rb +1 -1
  26. data/lib/libis/format.rb +9 -0
  27. data/libis-format.gemspec +2 -0
  28. data/spec/converter_spec.rb +212 -0
  29. data/spec/data/test-options.jpg +0 -0
  30. data/spec/data/test.jpg +0 -0
  31. data/spec/data/test.pdf.tif +0 -0
  32. data/spec/data/test.png +0 -0
  33. data/spec/data/test_pdfa.pdf +0 -0
  34. data/spec/identifier_spec.rb +1 -0
  35. data/tools/PdfTool.jar +0 -0
  36. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  37. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  38. metadata +56 -5
  39. data/data/ISOcoated.icc +0 -0
  40. data/tools/fido/argparselocal.pyc +0 -0
@@ -0,0 +1,211 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+ require 'libis/format/identifier'
5
+
6
+ require 'mini_magick'
7
+
8
+ MiniMagick.configure do |config|
9
+ config.debug = false
10
+ end
11
+
12
+ module Libis
13
+ module Format
14
+ module Converter
15
+
16
+ class ImageConverter < Libis::Format::Converter::Base
17
+
18
+ def self.input_types(format = nil)
19
+ [:TIFF, :JPG, :PNG, :BMP, :GIF, :JP2, :PDF]
20
+ end
21
+
22
+ def self.output_types(_ = nil)
23
+ [:TIFF, :JPG, :PNG, :BMP, :GIF, :JP2, :PDF]
24
+ end
25
+
26
+ def initialize
27
+ @wm_image = nil
28
+ super
29
+ end
30
+
31
+ def imaginate(_)
32
+ #force usage of this converter
33
+ end
34
+
35
+ def scale(percent)
36
+ @options[:scale] = percent
37
+ end
38
+
39
+ def resize(geometry)
40
+ @options[:resize] = geometry
41
+ end
42
+
43
+ def quality(value)
44
+ @options[:quality] = value
45
+ end
46
+
47
+ def dpi(value)
48
+ @options[:density] = value
49
+ end
50
+
51
+ def resample(value)
52
+ @options[:density] = value
53
+ end
54
+
55
+ def flatten
56
+ @flags[:flatten] = true
57
+ end
58
+
59
+ def colorspace(value)
60
+ @options[:colorspace] = value
61
+ end
62
+
63
+ # Create or use a watermark image.
64
+ #
65
+ # The watermark options are:
66
+ # - file: watermark image to use
67
+ # - text: text to create a watermark from
68
+ # - rotation: rotation of the watermark text (counter clockwise in degrees; integer number) - default 30
69
+ # - size: size of the watermark (integer > 0, 1/n of image size) - default 4
70
+ # - opacity: opacity of the watermark (fraction 0.0 - 1.0) - default 0.3
71
+ # - gap: size of the gap between watermark instances. Fractions as percentage of widht/height. - default 0.2
72
+ # If both options are given, the file will be used as-is if it exists and is a valid image file. Otherwise the
73
+ # file will be created or overwritten with a newly created watermark image.
74
+ #
75
+ # The created watermark file will be a PNG image with transparent background containing the supplied text
76
+ # slanted by 30 degrees counter-clockwise.
77
+ #
78
+ # @param [Hash] options Hash of options for watermark creation.
79
+ def watermark(options = {})
80
+ text = options[:text] || '© LIBIS'
81
+ image = options[:file] || (Dir::Tmpname.create(%w(wm_image .png)) { |_|})
82
+ @wm_size = (options[:size] || '4').to_int
83
+ @wm_opacity = ((options[:opacity] || 0.1).to_f * 100).to_int
84
+ @wm_composition = options[:composition] || 'modulate'
85
+ gap = ((options[:gap] || 0.2).to_f * 100).to_int
86
+ rotation = 360 - (options[:rotation] || 30).to_i
87
+ @wm_image = MiniMagick::Image.new(image)
88
+ unless @wm_image.valid?
89
+ # noinspection RubyResolve
90
+ MiniMagick::Tool::Convert.new do |convert|
91
+ # noinspection RubyLiteralArrayInspection
92
+ convert.background 'transparent'
93
+ convert.size('2000x2000')
94
+ convert.gravity 'Center'
95
+ convert.font('Helvetica').fill('black').pointsize(72) #.stroke('black').strokewidth(1)
96
+ convert << "label:#{text}"
97
+ convert.rotate rotation
98
+ convert.trim.repage.+
99
+ convert.bordercolor('transparent').border("#{gap}%")
100
+ convert << image
101
+ end
102
+ if options[:file]
103
+ @wm_image = MiniMagick::Image.new(image)
104
+ else
105
+ @wm_image = MiniMagick::Image.open(image)
106
+ File.delete(image)
107
+ end
108
+ # noinspection RubyResolve
109
+ unless @wm_image.valid?
110
+ error "Problem creating watermark image '#{image}'."
111
+ @wm_image = nil
112
+ end
113
+ end
114
+ end
115
+
116
+ def convert(source, target, format, opts = {})
117
+ super
118
+
119
+ if source.is_a? Array
120
+ sources = source
121
+
122
+ unless [:PDF, :TIFF, :GIF, :PBM, :PGM, :PPM].include? format
123
+ error 'Can ony assemble multiple images into multi-page/layer format'
124
+ return nil
125
+ end
126
+
127
+ assemble_and_convert(sources, target, format)
128
+
129
+ elsif File.directory?(source)
130
+ sources = Dir[File.join(source, '**', '*')].reject { |p| File.directory? p }
131
+
132
+ unless [:TIFF, :PDF].include? format
133
+ error 'Can ony assemble multiple images into multi-page/layer format'
134
+ return nil
135
+ end
136
+
137
+ assemble_and_convert(sources, target, format)
138
+
139
+ else
140
+
141
+ image = MiniMagick::Image.new(source)
142
+
143
+ if image.pages.size > 1
144
+ assemble_and_convert(image.pages.map {|page| page.path}, target, format)
145
+ else
146
+ convert_image(source, target, format)
147
+ end
148
+ end
149
+
150
+ target
151
+
152
+ end
153
+
154
+ def assemble_and_convert(sources, target, format)
155
+ converted_pages = sources.inject([]) do |list, path|
156
+ converted = Tempfile.new(['page-', ".#{Libis::Format::TypeDatabase.type_extentions(format).first}"])
157
+ convert_image(path, converted.path, format)
158
+ list << converted
159
+ end
160
+ MiniMagick::Tool::Convert.new do |b|
161
+ converted_pages.each { |page| b << page.path }
162
+ b << target
163
+ end
164
+ converted_pages.each do |temp_file|
165
+ temp_file.close
166
+ temp_file.unlink
167
+ end
168
+ end
169
+
170
+ protected
171
+
172
+ def convert_image(source, target, format)
173
+
174
+ image = MiniMagick::Image.new(source)
175
+
176
+ MiniMagick::Tool::Convert.new do |convert|
177
+ if @wm_image
178
+ convert << @wm_image.path
179
+ convert.filter('Lagrange')
180
+ convert.resize("#{image.width / @wm_size}x#{image.height / @wm_size}").write('mpr:watermark').delete.+
181
+ end
182
+
183
+ convert << source
184
+ convert.flatten if format == :JPG
185
+ if @wm_image
186
+ # noinspection RubyResolve
187
+ convert.stack do |stack|
188
+ stack.size("#{image.width}x#{image.height}")
189
+ stack << 'xc:transparent'
190
+ stack.tile('mpr:watermark')
191
+ stack.draw "rectangle 0,0,#{image.width},#{image.height}"
192
+ end
193
+ convert.compose(@wm_composition).define("compose:args=#{@wm_opacity}").composite
194
+ end
195
+
196
+ @options.each { |o, v| convert.send(o, v) }
197
+ @flags.each { |f, v| v.is_a?(FalseClass) ? convert.send(f).+ : convert.send(f) }
198
+
199
+ convert.format(format)
200
+ convert << target
201
+ end
202
+
203
+ target
204
+
205
+ end
206
+
207
+ end
208
+
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,46 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/office_to_pdf'
6
+ require 'libis/format/type_database'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class OfficeConverter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types(_ = nil)
15
+ [
16
+ :TXT,
17
+ :RTF,
18
+ :HTML,
19
+ :MSDOC,
20
+ :MSXLS,
21
+ :MSPPT,
22
+ :MSDOCX,
23
+ :MSXLSX,
24
+ :MSPPTX,
25
+ :WORDPERFECT,
26
+ ]
27
+ end
28
+
29
+ def self.output_types(_ = nil)
30
+ [:PDF]
31
+ end
32
+
33
+ def convert(source, target, format, opts = {})
34
+ super
35
+
36
+ return nil unless OfficeToPdf.run(source, target)
37
+
38
+ target
39
+
40
+ end
41
+
42
+ end
43
+
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,110 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/pdf_copy'
6
+ require 'libis/format/pdf_to_pdfa'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class PdfConverter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types(_ = nil)
15
+ [:PDF]
16
+ end
17
+
18
+ def self.output_types(_ = nil)
19
+ [:PDF, :PDFA]
20
+ end
21
+
22
+ # Set metadata for Pdf file
23
+ #
24
+ # valid metadata keys are):
25
+ # - title
26
+ # - author
27
+ # - creator
28
+ # - keywords
29
+ # - subject
30
+ #
31
+ # @param [Hash] values list of metadata values to set
32
+ def metadata(values = {})
33
+ values.each do |k, v|
34
+ next unless [:title, :author, :creator, :keywords, :subject].include?(k.to_s.to_sym)
35
+ @options["md_#{k}"] = v
36
+ end
37
+ end
38
+
39
+ # Select a partial list of pages
40
+ # @param [String] selection as described in com.itextpdf.text.pdf.SequenceList: [!][o][odd][e][even]start-end
41
+ def range(selection)
42
+ @options[:ranges] = selection
43
+ end
44
+
45
+ # Create or use a watermark image.
46
+ #
47
+ # The watermark options are (use symbols):
48
+ # - text: text to create a watermark from
49
+ # - file: watermark image to use
50
+ # - rotation: rotation of the watermark text (in degrees; integer number)
51
+ # - size: font size of the watermark text
52
+ # - opacity: opacity of the watermark (fraction 0.0 - 1.0)
53
+ # - gap: size of the gap between watermark instances. Integer value is absolute size in points (1/72 inch). Fractions are percentage of widht/height.
54
+ # If both options are given, the file will be used as-is if it exists and is a valid image file. Otherwise the
55
+ # file will be created or overwritten with a newly created watermark image.
56
+ #
57
+ # The created watermark file will be a PNG image with transparent background containing the supplied text
58
+ # slanted by 30 degrees counter-clockwise.
59
+ #
60
+ # @param [Hash] options Hash of options for watermark creation.
61
+ def watermark(options = {})
62
+ if options[:file] && File.exist?(options[:file])
63
+ @options['wm_image'] = options[:file]
64
+ else
65
+ @options['wm_text'] = (options[:text] || '© LIBIS').split('\n')
66
+ @options['wm_text_rotation'] = options[:rotation] if options[:rotation]
67
+ @options['wm_font_size'] = options[:size] if options[:size]
68
+ end
69
+ @options['wm_opacity'] = options[:opacity]
70
+ @options['wm_gap_ratio'] = options[:gap] if options[:gap].to_s =~ /^\s*(0+\.\d+|1\.0+)\s*$/
71
+ @options['wm_gap_size'] = options[:gap] if options[:gap].to_s =~ /^\s*\d+\s*$/
72
+ end
73
+
74
+ def convert(source, target, format, opts = {})
75
+ super
76
+
77
+ result = nil
78
+
79
+ unless @options.empty?
80
+ result = convert_pdf(source, target)
81
+ return nil unless result
82
+ source = result
83
+ end
84
+
85
+ if format == :PDFA and source
86
+ result = pdf_to_pdfa(source, target)
87
+ end
88
+
89
+ result
90
+
91
+ end
92
+
93
+
94
+ def convert_pdf(source, target)
95
+
96
+ using_temp(target) { |tmpname| Libis::Format::PdfCopy.run source, tmpname, @options.map { |k, v| ["--#{k}", v.to_s] }.flatten }
97
+
98
+ end
99
+
100
+ def pdf_to_pdfa(source, target)
101
+
102
+ using_temp(target) { |tmpname| Libis::Format::PdfToPdfa.run source, tmpname }
103
+
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
110
+ end
@@ -4,6 +4,7 @@ require 'set'
4
4
  require 'singleton'
5
5
 
6
6
  require 'libis/tools/logger'
7
+ require 'libis/format/config'
7
8
 
8
9
  require_relative 'chain'
9
10
 
@@ -16,11 +17,11 @@ module Libis
16
17
  include ::Libis::Tools::Logger
17
18
 
18
19
  attr_reader :converters
19
- attr_writer :converters_glob
20
+ attr_accessor :converters_glob
20
21
 
21
22
  def initialize
22
23
  @converters = Set.new
23
- @converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
24
+ @converters_glob = File.join(File.dirname(__FILE__), '*_converter.rb')
24
25
  end
25
26
 
26
27
  def Repository.register(converter_class)
@@ -28,18 +29,26 @@ module Libis
28
29
  end
29
30
 
30
31
  def Repository.get_converters
31
- if instance.converters.empty?
32
- Dir.glob(instance.converters_glob).each do |filename|
32
+ instance.get_converters
33
+ end
34
+
35
+ def get_converters
36
+ if converters.empty?
37
+ Dir.glob(converters_glob).each do |filename|
33
38
  # noinspection RubyResolve
34
39
  require File.expand_path(filename)
35
40
  end
36
41
  end
37
- instance.converters
42
+ converters
43
+ end
44
+
45
+ def Repository.get_converter_chain(src_type, tgt_type, operations = {})
46
+ instance.get_converter_chain src_type, tgt_type, operations
38
47
  end
39
48
 
40
- def Repository.get_converter_chain(src_type, tgt_type, operations = [])
49
+ def get_converter_chain(src_type, tgt_type, operations = {})
41
50
  msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
42
- chain_list = recursive_chain src_type, tgt_type, operations
51
+ chain_list = find_chains src_type, tgt_type, operations
43
52
  if chain_list.length > 1
44
53
  warn "Found more than one conversion chain for #{msg}. Picking the first one."
45
54
  end
@@ -48,59 +57,38 @@ module Libis
48
57
  return nil
49
58
  end
50
59
  chain_list.each do |chain|
51
- msg = "Base chain: #{src_type.to_s}"
52
- chain.each do |node|
53
- msg += "->#{node[:converter].name}:#{node[:target].to_s}"
54
- end
55
- debug msg
60
+ debug "Matched chain: #{chain}"
56
61
  end
57
- ::Libis::Format::Converters::Chain.new(chain_list[0])
62
+ chain_list[0]
58
63
  end
59
64
 
60
65
  private
61
66
 
62
- def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
63
- return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
64
-
65
- get_converters.each do |converter|
66
- if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
67
- c[:converter] == converter and c[:target] == tgt_type }
68
- node = Hash.new
69
- node[:converter] = converter
70
- node[:target] = tgt_type
71
- sequence = current_chain.dup
72
- sequence << node
73
- # check if the chain supports all the operations
74
- success = true
75
- operations.each do |op, _|
76
- success = false unless sequence.any? do |n|
77
- n[:converter].new.respond_to? op.to_s.downcase.to_sym
78
- end
79
- end
80
- if success
81
- # we only want to remember the shortest converter chains
82
- if !chains_found.empty? and sequence.length < chains_found[0].length
83
- chains_found.clear
84
- end
85
- chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
86
- end
87
- end
88
- end
67
+ def find_chains(src_type, tgt_type, operations)
68
+ chain = Libis::Format::Converter::Chain.new(src_type, tgt_type, operations)
69
+ build_chains(chain)
70
+ end
71
+
72
+ def build_chains(chain)
89
73
 
90
- return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
74
+ found = []
75
+ chains = [chain]
91
76
 
92
- get_converters.each do |converter|
93
- next unless converter.input_type? src_type
94
- converter.output_types(src_type).each do |tmp_type|
95
- # would like to enable the following for optimalization, but some operation may require such a step
96
- # next if tmp_type == src_type
97
- # next if current_chain.any? { |c| c[:target] == tmp_type}
98
- recursive_chain(tmp_type, tgt_type, operations, chains_found,
99
- current_chain.dup << {:converter => converter, :target => tmp_type})
77
+ # Avoid chains that are too long
78
+ Libis::Format::Config[:converter_chain_max_level].times do
79
+ new_chains = []
80
+ get_converters.each do |converter|
81
+ new_chains += chains.map { |c| c.append(converter) }.flatten
100
82
  end
83
+
84
+ found = new_chains.select { |c| c.valid?}
85
+ return found unless found.empty?
86
+
87
+ chains = new_chains
101
88
  end
102
89
 
103
- chains_found
90
+ found
91
+
104
92
  end
105
93
 
106
94
  end
@@ -1,31 +1,38 @@
1
- require 'os'
2
1
  require 'tempfile'
3
2
  require 'csv'
4
- require 'singleton'
5
3
 
6
4
  require 'libis/tools/extend/string'
7
5
  require 'libis/tools/logger'
8
6
  require 'libis/tools/command'
9
7
 
8
+ require 'libis/format/config'
9
+
10
10
  module Libis
11
11
  module Format
12
12
 
13
13
  class Droid
14
14
  include ::Libis::Tools::Logger
15
- include Singleton
16
15
 
17
16
  def self.run(file)
18
- instance.run file
17
+ self.new.run file
19
18
  end
20
19
 
21
20
  def run(file)
22
- droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
23
- droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
24
21
  profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
22
  report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
- result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
23
+ result = Libis::Tools::Command.run(
24
+ Libis::Format::Config[:droid_path],
25
+ '-a', file.escape_for_string,
26
+ '-p', profile,
27
+ '-q',
28
+ )
27
29
  warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
28
- result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
30
+ result = Libis::Tools::Command.run(
31
+ Libis::Format::Config[:droid_path],
32
+ '-e', report,
33
+ '-p', profile,
34
+ '-q'
35
+ )
29
36
  warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
30
37
  File.delete profile
31
38
  result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
@@ -1,11 +1,10 @@
1
- require 'os'
2
1
  require 'csv'
3
- require 'singleton'
4
2
 
5
3
  require 'libis/tools/extend/string'
6
4
  require 'libis/tools/logger'
7
5
  require 'libis/tools/command'
8
6
 
7
+ require 'libis/format/config'
9
8
  require 'libis/format/type_database'
10
9
 
11
10
  module Libis
@@ -13,12 +12,11 @@ module Libis
13
12
 
14
13
  class Fido
15
14
  include ::Libis::Tools::Logger
16
- include Singleton
17
15
 
18
16
  BAD_MIMETYPES = [nil, '', 'None', 'application/octet-stream']
19
17
 
20
18
  def self.run(file, formats = nil)
21
- instance.run file, formats
19
+ self.new.run file, formats
22
20
  end
23
21
 
24
22
  def run(file, xtra_formats = nil)
@@ -35,17 +33,14 @@ module Libis
35
33
  # do nothing
36
34
  end
37
35
 
38
- bin_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido'))
39
- cmd = File.join(bin_dir, OS.windows? ? 'fido.bat' : 'fido.sh')
40
36
  args = []
41
37
  args << '-loadformats' << "#{fmt_list.join(',')}" unless fmt_list.empty?
42
38
  args << "#{file.escape_for_string}"
43
- fido = ::Libis::Tools::Command.run(cmd, *args)
39
+ fido = ::Libis::Tools::Command.run(Libis::Format::Config[:fido_path], *args)
44
40
  warn "Fido errors: #{fido[:err].join("\n")}" unless fido[:err].empty?
45
41
 
46
42
  keys = [:status, :time, :puid, :format_name, :signature_name, :filesize, :filename, :mimetype, :matchtype]
47
43
  fido_output = CSV.parse(fido[:out].join("\n")).map { |a| Hash[keys.zip(a)] }
48
- debug "Fido output: #{fido_output}"
49
44
 
50
45
  fido_output.each do |x|
51
46
  if x[:status] == 'OK'
@@ -72,8 +67,6 @@ module Libis
72
67
  result
73
68
  end
74
69
 
75
- debug "Fido results: #{fido_results}"
76
-
77
70
  max_score = fido_results.keys.max
78
71
 
79
72
  # Only if we find a single hit of type 'signature' or 'container', we are confident enough to return a result