libis-format 1.0.5 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -1
  3. data/.travis.yml +32 -24
  4. data/README.md +2 -2
  5. data/base/Dockerfile +24 -2
  6. data/base/Dockerfile.alpine +20 -0
  7. data/base/Dockerfile.rvm +56 -0
  8. data/base/rework_path +20 -0
  9. data/docker_cfg.yml +1 -0
  10. data/lib/libis/format.rb +12 -3
  11. data/lib/libis/format/cli/convert.rb +4 -4
  12. data/lib/libis/format/config.rb +16 -12
  13. data/lib/libis/format/converter/audio_converter.rb +2 -36
  14. data/lib/libis/format/converter/base.rb +22 -8
  15. data/lib/libis/format/converter/chain.rb +3 -3
  16. data/lib/libis/format/converter/image_assembler.rb +82 -0
  17. data/lib/libis/format/converter/image_converter.rb +20 -138
  18. data/lib/libis/format/converter/image_splitter.rb +84 -0
  19. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  20. data/lib/libis/format/converter/jp2_converter.rb +1 -1
  21. data/lib/libis/format/converter/office_converter.rb +2 -2
  22. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +6 -132
  24. data/lib/libis/format/converter/pdf_metadata.rb +82 -0
  25. data/lib/libis/format/converter/pdf_optimizer.rb +67 -0
  26. data/lib/libis/format/converter/pdf_protecter.rb +147 -0
  27. data/lib/libis/format/converter/pdf_selecter.rb +83 -0
  28. data/lib/libis/format/converter/pdf_splitter.rb +70 -0
  29. data/lib/libis/format/converter/pdf_watermarker_header.rb +71 -0
  30. data/lib/libis/format/converter/pdf_watermarker_image.rb +76 -0
  31. data/lib/libis/format/converter/pdf_watermarker_text.rb +93 -0
  32. data/lib/libis/format/converter/spreadsheet_converter.rb +2 -2
  33. data/lib/libis/format/converter/video_converter.rb +1 -1
  34. data/lib/libis/format/identifier.rb +3 -3
  35. data/lib/libis/format/info.rb +27 -0
  36. data/lib/libis/format/library.rb +147 -0
  37. data/lib/libis/format/tool.rb +4 -1
  38. data/lib/libis/format/tool/extension_identification.rb +4 -4
  39. data/lib/libis/format/tool/identification_tool.rb +6 -6
  40. data/lib/libis/format/tool/pdf_merge.rb +3 -3
  41. data/lib/libis/format/tool/{pdf_copy.rb → pdf_metadata.rb} +5 -5
  42. data/lib/libis/format/tool/pdf_protect.rb +47 -0
  43. data/lib/libis/format/tool/pdf_select.rb +47 -0
  44. data/lib/libis/format/tool/pdf_split.rb +4 -4
  45. data/lib/libis/format/tool/pdf_watermark.rb +47 -0
  46. data/lib/libis/format/tool/spreadsheet_to_ods.rb +1 -0
  47. data/lib/libis/format/version.rb +1 -1
  48. data/lib/libis/format/yaml_loader.rb +71 -0
  49. data/libis-format.gemspec +3 -2
  50. data/tools/PdfTool.jar +0 -0
  51. data/tools/bcpkix-jdk15on-167.jar +0 -0
  52. data/tools/bcprov-jdk15on-167.jar +0 -0
  53. metadata +32 -13
  54. data/lib/libis/format/type_database.rb +0 -134
  55. data/lib/libis/format/type_database_impl.rb +0 -120
  56. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  57. data/tools/bcprov-jdk15on-1.49.jar +0 -0
@@ -0,0 +1,83 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_select'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ # noinspection DuplicatedCode
12
+ class PdfSelecter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types
15
+ [:PDF]
16
+ end
17
+
18
+ def self.output_types(format = nil)
19
+ return [] unless input_types.include?(format) if format
20
+ [:PDF]
21
+ end
22
+
23
+ def pdf_select(_)
24
+ #force usage of this converter
25
+ end
26
+
27
+ def initialize
28
+ super
29
+ @options[:ranges] = []
30
+ end
31
+
32
+ # Select a partial list of pages
33
+ # @param [String] selection as described in com.itextpdf.text.pdf.SequenceList: [!][o][odd][e][even]start-end
34
+ def range(selection)
35
+ @options[:ranges] += selection.split(/\s*,\s*/) unless selection.blank?
36
+ end
37
+
38
+ # Select a partial list of pages
39
+ # @param [String|Array<String>] selection as described in com.itextpdf.text.pdf.SequenceList: [!][o][odd][e][even]start-end
40
+ def ranges(selection)
41
+ case selection
42
+ when Array
43
+ @options[:ranges] += selection unless selection.empty?
44
+ when String
45
+ range(selection)
46
+ else
47
+ # nothing
48
+ end
49
+ end
50
+
51
+ def convert(source, target, format, opts = {})
52
+ super
53
+
54
+ result = nil
55
+
56
+ unless @options.empty?
57
+ result = convert_pdf(source, target)
58
+ return nil unless result
59
+ end
60
+
61
+ result
62
+
63
+ end
64
+
65
+ def convert_pdf(source, target)
66
+
67
+ using_temp(target) do |tmpname|
68
+ opts = @options[:ranges].map { |range| ["-r", range] }.compact.flatten
69
+ result = Libis::Format::Tool::PdfSelect.run(source, tmpname, opts)
70
+ unless result[:err].empty?
71
+ error("Pdf selection encountered errors:\n%s", result[:err].join(join("\n")))
72
+ next nil
73
+ end
74
+ tmpname
75
+ end
76
+
77
+ end
78
+
79
+ end
80
+
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,70 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_split'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ # noinspection DuplicatedCode
12
+ class PdfSplitter < Libis::Format::Converter::Base
13
+
14
+ def self.input_types
15
+ [:PDF]
16
+ end
17
+
18
+ def self.output_types(format = nil)
19
+ return [] unless input_types.include?(format) if format
20
+ [:PDFA]
21
+ end
22
+
23
+ def self.category
24
+ :splitter
25
+ end
26
+
27
+ def initialize
28
+ super
29
+ end
30
+
31
+ # Split at given page. If omitted, nil or 0, the source PDF will be split at every page
32
+ def page(v)
33
+ @options[:page] = v unless v.blank?
34
+ end
35
+
36
+ def convert(source, target, format, opts = {})
37
+ super
38
+
39
+ result = split(source, target)
40
+ return nil unless result
41
+
42
+ result
43
+ end
44
+
45
+ private
46
+
47
+ def split(source, target)
48
+
49
+ result = Libis::Format::Tool::PdfSplit.run(
50
+ source, target,
51
+ @options.map { |k, v|
52
+ if v.nil?
53
+ nil
54
+ else
55
+ ["--#{k}", v]
56
+ end }.compact.flatten
57
+ )
58
+ unless result[:err].empty?
59
+ error("Pdf split encountered errors:\n%s", result[:err].join(join("\n")))
60
+ return nil
61
+ end
62
+ result[:out]
63
+
64
+ end
65
+
66
+ end
67
+
68
+ end
69
+ end
70
+ end
@@ -0,0 +1,71 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_watermark'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ class PdfWatermarkerHeader < Libis::Format::Converter::Base
12
+
13
+ def self.input_types
14
+ [:PDF]
15
+ end
16
+
17
+ def self.output_types(format = nil)
18
+ return [] unless input_types.include?(format) if format
19
+ [:PDF]
20
+ end
21
+
22
+ def pdf_watermark_header(_)
23
+ #force usage of this converter
24
+ end
25
+
26
+ def initialize
27
+ super
28
+ end
29
+
30
+ def text(v)
31
+ @options[:text] = v.blank? ? nil : v
32
+ end
33
+
34
+ def convert(source, target, format, opts = {})
35
+ super
36
+
37
+ result = convert_pdf(source, target)
38
+ return nil unless result
39
+
40
+ result
41
+
42
+ end
43
+
44
+ # noinspection DuplicatedCode
45
+ def convert_pdf(source, target)
46
+
47
+ using_temp(target) do |tmpname|
48
+ result = Libis::Format::Tool::PdfWatermark.run(
49
+ source, tmpname, 'header',
50
+ @options.map {|k, v|
51
+ if v.nil?
52
+ nil
53
+ else
54
+ ["--#{k}", v]
55
+ end
56
+ }.compact.flatten
57
+ )
58
+ unless result[:err].empty?
59
+ error("Pdf conversion encountered errors:\n%s", result[:err].join(join("\n")))
60
+ next nil
61
+ end
62
+ tmpname
63
+ end
64
+
65
+ end
66
+
67
+ end
68
+
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,76 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_watermark'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ class PdfWatermarkerImage < Libis::Format::Converter::Base
12
+
13
+ def self.input_types
14
+ [:PDF]
15
+ end
16
+
17
+ def self.output_types(format = nil)
18
+ return [] unless input_types.include?(format) if format
19
+ [:PDF]
20
+ end
21
+
22
+ def pdf_watermark_image(_)
23
+ #force usage of this converter
24
+ end
25
+
26
+ def initialize
27
+ super
28
+ @options[:opacity] = '0.3'
29
+ end
30
+
31
+ def file(v)
32
+ @file = v
33
+ end
34
+
35
+ def opacity(v)
36
+ @options[:opacity] = v unless v.blank?
37
+ end
38
+
39
+ def convert(source, target, format, opts = {})
40
+ super
41
+
42
+ result = convert_pdf(source, target)
43
+ return nil unless result
44
+
45
+ result
46
+
47
+ end
48
+
49
+ # noinspection DuplicatedCode
50
+ def convert_pdf(source, target)
51
+
52
+ using_temp(target) do |tmpname|
53
+ result = Libis::Format::Tool::PdfWatermark.run(
54
+ source, tmpname, 'image',
55
+ @options.map {|k, v|
56
+ if v.nil?
57
+ nil
58
+ else
59
+ ["--#{k}", v]
60
+ end
61
+ }.compact.flatten + [@file]
62
+ )
63
+ unless result[:err].empty?
64
+ error("Pdf conversion encountered errors:\n%s", result[:err].join(join("\n")))
65
+ next nil
66
+ end
67
+ tmpname
68
+ end
69
+
70
+ end
71
+
72
+ end
73
+
74
+ end
75
+ end
76
+ end
@@ -0,0 +1,93 @@
1
+ # encoding: utf-8
2
+
3
+ require_relative 'base'
4
+
5
+ require 'libis/format/tool/pdf_watermark'
6
+
7
+ module Libis
8
+ module Format
9
+ module Converter
10
+
11
+ class PdfWatermarkerText < Libis::Format::Converter::Base
12
+
13
+ def self.input_types
14
+ [:PDF]
15
+ end
16
+
17
+ def self.output_types(format = nil)
18
+ return [] unless input_types.include?(format) if format
19
+ [:PDF]
20
+ end
21
+
22
+ def pdf_watermark_text(_)
23
+ #force usage of this converter
24
+ end
25
+
26
+ def initialize
27
+ super
28
+ @text = []
29
+ @options[:opacity] = '0.3'
30
+ end
31
+
32
+ def text(v)
33
+ @text += v.split("\n")
34
+ end
35
+
36
+ def rotation(v)
37
+ @options[:rotation] = v unless v.blank?
38
+ end
39
+
40
+ def size(v)
41
+ @options[:size] = v unless v.blank?
42
+ end
43
+
44
+ def opacity(v)
45
+ @options[:opacity] = v unless v.blank?
46
+ end
47
+
48
+ def gap(v)
49
+ @options[:gap] = v
50
+ end
51
+
52
+ def padding(v)
53
+ @options[:padding] = v
54
+ end
55
+
56
+ def convert(source, target, format, opts = {})
57
+ super
58
+
59
+ result = convert_pdf(source, target)
60
+ return nil unless result
61
+
62
+ result
63
+
64
+ end
65
+
66
+ # noinspection DuplicatedCode
67
+ def convert_pdf(source, target)
68
+
69
+ using_temp(target) do |tmpname|
70
+ result = Libis::Format::Tool::PdfWatermark.run(
71
+ source, tmpname, 'text',
72
+ @options.map {|k, v|
73
+ if v.nil?
74
+ nil
75
+ else
76
+ ["--#{k}", v]
77
+ end
78
+ }.compact.flatten + @text
79
+ )
80
+ unless result[:err].empty?
81
+ error("Pdf conversion encountered errors:\n%s", result[:err].join(join("\n")))
82
+ next nil
83
+ end
84
+ tmpname
85
+ end
86
+
87
+ end
88
+
89
+ end
90
+
91
+ end
92
+ end
93
+ end
@@ -3,7 +3,7 @@
3
3
  require_relative 'base'
4
4
 
5
5
  require 'libis/format/tool/spreadsheet_to_ods'
6
- require 'libis/format/type_database'
6
+ require 'libis/format/library'
7
7
 
8
8
  module Libis
9
9
  module Format
@@ -20,7 +20,7 @@ module Libis
20
20
  end
21
21
 
22
22
  def self.output_types(format = nil)
23
- return [] unless input_types.include?(format)
23
+ return [] unless input_types.include?(format) if format
24
24
  [:OO_CALC]
25
25
  end
26
26
 
@@ -14,7 +14,7 @@ module Libis
14
14
  end
15
15
 
16
16
  def self.output_types(format = nil)
17
- return [] unless input_types.include?(format)
17
+ return [] unless input_types.include?(format) if format
18
18
  [:GIF, :WEBM, :MP4, :MPG, :MKV, :MJP2, :QTFF, :AVI, :OGGV, :WMV, :DV, :FLV, :SWF]
19
19
  end
20
20
 
@@ -9,7 +9,7 @@ require 'libis/tools/extend/string'
9
9
  require 'libis/tools/extend/empty'
10
10
  require 'nori/core_ext/object'
11
11
 
12
- require 'libis/format/type_database'
12
+ require 'libis/format/library'
13
13
 
14
14
  require_relative 'config'
15
15
  require_relative 'tool/fido'
@@ -132,7 +132,7 @@ module Libis
132
132
  if doc.validates_against?(xsd_file)
133
133
  log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
134
134
  info = {mimetype: mime, tool_raw: file_result[:tool], tool: :xsd_validation, match_type: 'xsd_validation', format_version: '', }
135
- file_result.merge! Libis::Format::TypeDatabase.enrich(info, PUID: :puid, MIME: :mimetype, NAME: :format_name)
135
+ file_result.merge! Libis::Format::Library.enrich(info, puid: :puid, mimetype: :mimetype, name: :format_name)
136
136
  end
137
137
  rescue => e
138
138
  # Do nothing - probably Nokogiri chrashed during validation. Could have many causes
@@ -208,7 +208,7 @@ module Libis
208
208
  end
209
209
 
210
210
  def get_mimetype(puid)
211
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
211
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetypes)
212
212
  end
213
213
 
214
214
  def get_best_result(results)