libis-format 1.0.7 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. checksums.yaml +4 -4
  2. data/.travis.yml +32 -24
  3. data/README.md +2 -2
  4. data/base/Dockerfile +5 -3
  5. data/base/rework_path +5 -10
  6. data/lib/libis/format.rb +5 -2
  7. data/lib/libis/format/cli/convert.rb +4 -4
  8. data/lib/libis/format/config.rb +3 -1
  9. data/lib/libis/format/converter/audio_converter.rb +2 -36
  10. data/lib/libis/format/converter/base.rb +21 -8
  11. data/lib/libis/format/converter/chain.rb +3 -3
  12. data/lib/libis/format/converter/image_assembler.rb +82 -0
  13. data/lib/libis/format/converter/image_converter.rb +20 -138
  14. data/lib/libis/format/converter/image_splitter.rb +80 -0
  15. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  16. data/lib/libis/format/converter/jp2_converter.rb +1 -1
  17. data/lib/libis/format/converter/office_converter.rb +2 -2
  18. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  19. data/lib/libis/format/converter/pdf_converter.rb +27 -85
  20. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  21. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  22. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  23. data/lib/libis/format/converter/spreadsheet_converter.rb +2 -2
  24. data/lib/libis/format/converter/video_converter.rb +1 -1
  25. data/lib/libis/format/identifier.rb +3 -3
  26. data/lib/libis/format/info.rb +27 -0
  27. data/lib/libis/format/library.rb +147 -0
  28. data/lib/libis/format/tool/extension_identification.rb +4 -4
  29. data/lib/libis/format/tool/identification_tool.rb +6 -6
  30. data/lib/libis/format/tool/spreadsheet_to_ods.rb +1 -0
  31. data/lib/libis/format/version.rb +1 -1
  32. data/lib/libis/format/yaml_loader.rb +71 -0
  33. data/libis-format.gemspec +2 -1
  34. data/tools/fop/fop.bat +75 -75
  35. data/tools/fop/fop.cmd +31 -31
  36. data/tools/fop/fop.js +341 -341
  37. data/tools/fop/lib/avalon-framework.NOTICE.TXT +11 -11
  38. data/tools/fop/lib/xml-apis.LICENSE-SAX.html +17 -17
  39. data/tools/fop/lib/xml-apis.LICENSE.DOM-documentation.html +74 -74
  40. data/tools/fop/lib/xml-apis.LICENSE.DOM-software.html +66 -66
  41. metadata +13 -6
  42. data/lib/libis/format/type_database.rb +0 -133
  43. data/lib/libis/format/type_database_impl.rb +0 -120
@@ -0,0 +1,70 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/tools/extend/hash'
6
+ require 'libis/format/tool/pdf_copy'
7
+ require 'libis/format/tool/pdf_to_pdfa'
8
+ require 'libis/format/tool/pdf_optimizer'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class PdfOptimizer < Libis::Format::Converter::Base
15
+
16
+ def self.input_types
17
+ [:PDF]
18
+ end
19
+
20
+ def self.output_types(format = nil)
21
+ return [] unless input_types.include?(format) if format
22
+ [:PDF]
23
+ end
24
+
25
+ def pdf_optimize(_)
26
+ #force usage of this converter
27
+ end
28
+
29
+ # Optimize the PDF
30
+ #
31
+ # This reduces the graphics quality to a level in order to limit file size. This option relies on the
32
+ # presence of ghostscript and takes one argument: the quality level. It should be one of:
33
+ #
34
+ # - 0 : lowest quality (Acrobat Distiller 'Screen Optimized' equivalent)
35
+ # - 1 : medium quality (Acrobat Distiller 'eBook' equivalent)
36
+ # - 2 : good quality
37
+ # - 3 : high quality (Acrobat Distiller 'Print Optimized' equivalent)
38
+ # - 4 : highest quality (Acrobat Distiller 'Prepress Optimized' equivalent)
39
+ #
40
+ # Note that the optimization is intended to be used with PDF's containing high-resolution images.
41
+ #
42
+ # @param [Integer] setting quality setting. [0-4]
43
+ def quality(setting = 1)
44
+ @quality = %w(screen ebook default printer prepress)[setting] if (0..4) === setting
45
+ end
46
+
47
+ def convert(source, target, format, opts = {})
48
+ super
49
+
50
+ optimize_pdf(source, target, @quality || 'ebook')
51
+
52
+ end
53
+
54
+ def optimize_pdf(source, target, quality)
55
+
56
+ using_temp(target) do |tmpname|
57
+ result = Libis::Format::Tool::PdfOptimizer.run(source, tmpname, quality)
58
+ unless result[:status] == 0
59
+ error("Pdf optimization encountered errors:\n%s", (result[:err] + result[:out]).join("\n"))
60
+ next nil
61
+ end
62
+ tmpname
63
+ end
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,65 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_split'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ # noinspection DuplicatedCode
12
+ class PdfSplitter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types
15
+ [:PDF]
16
+ end
17
+
18
+ def self.output_types(format = nil)
19
+ return [] unless input_types.include?(format) if format
20
+ [:PDF]
21
+ end
22
+
23
+ def pdf_split(_)
24
+ #force usage of this converter
25
+ end
26
+
27
+ def self.category
28
+ :splitter
29
+ end
30
+
31
+ # Split at given page. If omitted or nil, the source PDF will be split at every page
32
+ def page(v)
33
+ @page = v unless v.blank
34
+ end
35
+
36
+ def convert(source, target, format, opts = {})
37
+ super
38
+
39
+ result = split(source, target)
40
+ return nil unless result
41
+
42
+ result
43
+ end
44
+
45
+ private
46
+
47
+ def split(source, target)
48
+
49
+ options = @page ? ['--page', @page] : ['--every_page']
50
+ using_temp(target) do |tmpname|
51
+ result = Libis::Format::Tool::PdfSplit.run(source, tmpname, *options)
52
+ unless result[:err].empty?
53
+ error("Pdf split encountered errors:\n%s", result[:err].join(join("\n")))
54
+ next nil
55
+ end
56
+ tmpname
57
+ end
58
+
59
+ end
60
+
61
+ end
62
+
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,110 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/tools/extend/hash'
6
+ require 'libis/format/tool/pdf_copy'
7
+ require 'libis/format/tool/pdf_to_pdfa'
8
+ require 'libis/format/tool/pdf_optimizer'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class PdfWatermarker < Libis::Format::Converter::Base
15
+
16
+ def self.input_types
17
+ [:PDF]
18
+ end
19
+
20
+ def self.output_types(format = nil)
21
+ return [] unless input_types.include?(format) if format
22
+ [:PDF]
23
+ end
24
+
25
+ def pdf_watermark(_)
26
+ #force usage of this converter
27
+ end
28
+
29
+ def initialize
30
+ super
31
+ @options[:text] = '© LIBIS'
32
+ @options[:opacity] = '0.3'
33
+ end
34
+
35
+ def file(v)
36
+ @options[:file] = v.blank? ? nil : v
37
+ end
38
+
39
+ def text(v)
40
+ @options[:text] = v
41
+ end
42
+
43
+ def rotation(v)
44
+ @options[:rotation] = v unless v.blank?
45
+ end
46
+
47
+ def size(v)
48
+ @options[:size] = v unless v.blank?
49
+ end
50
+
51
+ def opacity(v)
52
+ @options[:opacity] = v unless v.blank?
53
+ end
54
+
55
+ def gap_size(v)
56
+ @options[:gap_size] = v
57
+ end
58
+
59
+ def gap_ratio(v)
60
+ @options[:gap_ratio] = v
61
+ end
62
+
63
+ def convert(source, target, format, opts = {})
64
+ super
65
+
66
+ result = convert_pdf(source, target)
67
+ return nil unless result
68
+
69
+ result
70
+
71
+ end
72
+
73
+ OPTIONS_TABLE = {
74
+ file: 'wm_image',
75
+ text: 'wm_text',
76
+ rotation: 'wm_text_rotation',
77
+ size: 'wm_font_size',
78
+ opacity: 'wm_opacity',
79
+ gap_size: 'wm_gap_size',
80
+ gap_ratio: 'wm_gap_ratio'
81
+ }
82
+ # noinspection DuplicatedCode
83
+ def convert_pdf(source, target)
84
+
85
+ using_temp(target) do |tmpname|
86
+ result = Libis::Format::Tool::PdfCopy.run(
87
+ source, tmpname,
88
+ @options.map {|k, v|
89
+ if v.nil?
90
+ nil
91
+ else
92
+ v = v.split('\n') unless v.blank? if k == :text
93
+ k = OPTIONS_TABLE[k] || k
94
+ ["--#{k}", (v.is_a?(Array) ? v : v.to_s)]
95
+ end}.compact.flatten
96
+ )
97
+ unless result[:err].empty?
98
+ error("Pdf conversion encountered errors:\n%s", result[:err].join(join("\n")))
99
+ next nil
100
+ end
101
+ tmpname
102
+ end
103
+
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
110
+ end
@@ -3,7 +3,7 @@
3
3
  require_relative 'base'
4
4
 
5
5
  require 'libis/format/tool/spreadsheet_to_ods'
6
- require 'libis/format/type_database'
6
+ require 'libis/format/library'
7
7
 
8
8
  module Libis
9
9
  module Format
@@ -20,7 +20,7 @@ module Libis
20
20
  end
21
21
 
22
22
  def self.output_types(format = nil)
23
- return [] unless input_types.include?(format)
23
+ return [] unless input_types.include?(format) if format
24
24
  [:OO_CALC]
25
25
  end
26
26
 
@@ -14,7 +14,7 @@ module Libis
14
14
  end
15
15
 
16
16
  def self.output_types(format = nil)
17
- return [] unless input_types.include?(format)
17
+ return [] unless input_types.include?(format) if format
18
18
  [:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
19
19
  end
20
20
 
@@ -9,7 +9,7 @@ require 'libis/tools/extend/string'
9
9
  require 'libis/tools/extend/empty'
10
10
  require 'nori/core_ext/object'
11
11
 
12
- require 'libis/format/type_database'
12
+ require 'libis/format/library'
13
13
 
14
14
  require_relative 'config'
15
15
  require_relative 'tool/fido'
@@ -132,7 +132,7 @@ module Libis
132
132
  if doc.validates_against?(xsd_file)
133
133
  log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
134
134
  info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
135
- file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
135
+ file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
136
136
  end
137
137
  rescue => e
138
138
  # Do nothing - probably Nokogiri chrashed during validation. Could have many causes
@@ -208,7 +208,7 @@ module Libis
208
208
  end
209
209
 
210
210
  def get_mimetype(puid)
211
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
211
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
212
212
  end
213
213
 
214
214
  def get_best_result(results)
@@ -0,0 +1,27 @@
1
+ module Libis
2
+ module Format
3
+ class Info
4
+ attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
5
+
6
+ def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
7
+ @name = name
8
+ @category = category
9
+ @description = description
10
+ @puids = puids
11
+ @mimetypes = mimetypes
12
+ @extensions = extensions
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ name: name,
18
+ description: description.dup,
19
+ category: category,
20
+ puids: puids.dup,
21
+ mimetypes: mimetypes.dup,
22
+ extensions: extensions.dup
23
+ }
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,147 @@
1
+ # froze_string_litteral: true
2
+ # coding: utf-8
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+
8
+ class Library
9
+ include Singleton
10
+
11
+ class << self
12
+ def implementation=(impl)
13
+ instance.implementation = impl
14
+ end
15
+
16
+ def get_info(format)
17
+ instance.get_info(format)
18
+ end
19
+
20
+ def get_info_by(key, value)
21
+ instance.get_info_by(key, value)
22
+ end
23
+
24
+ def get_infos_by(key, value)
25
+ instance.get_infos_by(key, value)
26
+ end
27
+
28
+ def get_field(format, field)
29
+ instance.get_field(format, field)
30
+ end
31
+
32
+ def get_field_by(key, value, field)
33
+ instance.get_field_by(key, value, field)
34
+ end
35
+
36
+ def get_fields_by(key, value, field)
37
+ instance.get_fields_by(key, value, field)
38
+ end
39
+
40
+ def known?(key, value)
41
+ instance.known?(key, value)
42
+ end
43
+
44
+ def enrich(info, map_keys = {})
45
+ instance.enrich(info, map_keys)
46
+ end
47
+
48
+ def normalize(info, map_keys = {})
49
+ instance.normalize(info, map_keys)
50
+ end
51
+ end
52
+
53
+ def implementation=(impl)
54
+ @implementation = impl
55
+ end
56
+
57
+ def get_field(format, field)
58
+ get_field_by(:name, format, field)
59
+ end
60
+
61
+ def get_field_by(key, value, field)
62
+ info = get_info_by(key, value)
63
+ return nil unless info
64
+ case field
65
+ when :mimetype
66
+ info[:mimetypes]&.first
67
+ when :puid
68
+ info[:puids]&.first
69
+ when :extension
70
+ info[:extensions]&.first
71
+ else
72
+ info[field]
73
+ end
74
+ end
75
+
76
+ def get_fields_by(key, value, field)
77
+ get_infos_by(key, value)&.map { |info| info[field] }.compact
78
+ end
79
+
80
+ def get_info(format)
81
+ get_info_by(:name, format)
82
+ end
83
+
84
+ def get_info_by(key, value)
85
+ get_infos_by(key, value)&.first
86
+ end
87
+
88
+ def get_infos_by(key, value)
89
+ result = @implementation.query(key, value)
90
+ result.map(&:to_hash)
91
+ end
92
+
93
+ def known?(key, value)
94
+ !get_info_by(key, value).nil?
95
+ end
96
+
97
+ def enrich(info, map_keys = {})
98
+ info = normalize(info, map_keys)
99
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
100
+ unless (format = info[mapper[:name]]).nil?
101
+ lib_info = get_info(format)
102
+ mapper.keys.each do |key|
103
+ case key
104
+ when :mimetype
105
+ info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
106
+ when :puid
107
+ info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
108
+ when :extension
109
+ info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
110
+ else
111
+ info[mapper[key]] = lib_info[key] if lib_info[key]
112
+ end
113
+ end
114
+ end
115
+ info
116
+ end
117
+
118
+ # Derive name from the available info
119
+ def normalize(info, map_keys = {})
120
+ return {} unless info.is_a? Hash
121
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
122
+ # fill format from looking up by puid
123
+ unless (puid = info[mapper[:puid]]).blank?
124
+ info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
125
+ end
126
+ # fill format from looking up by mimetype
127
+ unless (mime = info[mapper[:mimetype]]).blank?
128
+ info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
129
+ end
130
+ # finally complete the information from looking up by format name
131
+ unless (format = info[mapper[:name]]).nil?
132
+ info[mapper[:mimetype]] = get_field(format, :mimetype)
133
+ info[mapper[:category]] = get_field(format, :category)
134
+ end
135
+ info
136
+ end
137
+
138
+ private
139
+
140
+ def initialize
141
+ @implementation = eval(Libis::Format::Config[:format_library_implementation])
142
+ end
143
+
144
+ end
145
+
146
+ end
147
+ end