libis-format 1.0.5 → 2.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -1
  3. data/.travis.yml +32 -24
  4. data/README.md +2 -2
  5. data/base/Dockerfile +24 -2
  6. data/base/Dockerfile.alpine +20 -0
  7. data/base/Dockerfile.rvm +56 -0
  8. data/base/rework_path +20 -0
  9. data/docker_cfg.yml +1 -0
  10. data/lib/libis/format.rb +12 -3
  11. data/lib/libis/format/cli/convert.rb +4 -4
  12. data/lib/libis/format/config.rb +16 -12
  13. data/lib/libis/format/converter/audio_converter.rb +2 -36
  14. data/lib/libis/format/converter/base.rb +22 -8
  15. data/lib/libis/format/converter/chain.rb +3 -3
  16. data/lib/libis/format/converter/image_assembler.rb +82 -0
  17. data/lib/libis/format/converter/image_converter.rb +20 -138
  18. data/lib/libis/format/converter/image_splitter.rb +84 -0
  19. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  20. data/lib/libis/format/converter/jp2_converter.rb +1 -1
  21. data/lib/libis/format/converter/office_converter.rb +2 -2
  22. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +6 -132
  24. data/lib/libis/format/converter/pdf_metadata.rb +82 -0
  25. data/lib/libis/format/converter/pdf_optimizer.rb +67 -0
  26. data/lib/libis/format/converter/pdf_protecter.rb +147 -0
  27. data/lib/libis/format/converter/pdf_selecter.rb +83 -0
  28. data/lib/libis/format/converter/pdf_splitter.rb +70 -0
  29. data/lib/libis/format/converter/pdf_watermarker_header.rb +71 -0
  30. data/lib/libis/format/converter/pdf_watermarker_image.rb +76 -0
  31. data/lib/libis/format/converter/pdf_watermarker_text.rb +93 -0
  32. data/lib/libis/format/converter/spreadsheet_converter.rb +2 -2
  33. data/lib/libis/format/converter/video_converter.rb +1 -1
  34. data/lib/libis/format/identifier.rb +3 -3
  35. data/lib/libis/format/info.rb +27 -0
  36. data/lib/libis/format/library.rb +147 -0
  37. data/lib/libis/format/tool.rb +4 -1
  38. data/lib/libis/format/tool/extension_identification.rb +4 -4
  39. data/lib/libis/format/tool/identification_tool.rb +6 -6
  40. data/lib/libis/format/tool/pdf_merge.rb +3 -3
  41. data/lib/libis/format/tool/{pdf_copy.rb → pdf_metadata.rb} +5 -5
  42. data/lib/libis/format/tool/pdf_protect.rb +47 -0
  43. data/lib/libis/format/tool/pdf_select.rb +47 -0
  44. data/lib/libis/format/tool/pdf_split.rb +4 -4
  45. data/lib/libis/format/tool/pdf_watermark.rb +47 -0
  46. data/lib/libis/format/tool/spreadsheet_to_ods.rb +1 -0
  47. data/lib/libis/format/version.rb +1 -1
  48. data/lib/libis/format/yaml_loader.rb +71 -0
  49. data/libis-format.gemspec +3 -2
  50. data/tools/PdfTool.jar +0 -0
  51. data/tools/bcpkix-jdk15on-167.jar +0 -0
  52. data/tools/bcprov-jdk15on-167.jar +0 -0
  53. metadata +32 -13
  54. data/lib/libis/format/type_database.rb +0 -134
  55. data/lib/libis/format/type_database_impl.rb +0 -120
  56. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  57. data/tools/bcprov-jdk15on-1.49.jar +0 -0
@@ -0,0 +1,27 @@
1
+ module Libis
2
+ module Format
3
+ class Info
4
+ attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
5
+
6
+ def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
7
+ @name = name
8
+ @category = category
9
+ @description = description
10
+ @puids = puids
11
+ @mimetypes = mimetypes
12
+ @extensions = extensions
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ name: name,
18
+ description: description.dup,
19
+ category: category,
20
+ puids: puids.dup,
21
+ mimetypes: mimetypes.dup,
22
+ extensions: extensions.dup
23
+ }
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,147 @@
1
+ # froze_string_litteral: true
2
+ # coding: utf-8
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+
8
+ class Library
9
+ include Singleton
10
+
11
+ class << self
12
+ def implementation=(impl)
13
+ instance.implementation = impl
14
+ end
15
+
16
+ def get_info(format)
17
+ instance.get_info(format)
18
+ end
19
+
20
+ def get_info_by(key, value)
21
+ instance.get_info_by(key, value)
22
+ end
23
+
24
+ def get_infos_by(key, value)
25
+ instance.get_infos_by(key, value)
26
+ end
27
+
28
+ def get_field(format, field)
29
+ instance.get_field(format, field)
30
+ end
31
+
32
+ def get_field_by(key, value, field)
33
+ instance.get_field_by(key, value, field)
34
+ end
35
+
36
+ def get_fields_by(key, value, field)
37
+ instance.get_fields_by(key, value, field)
38
+ end
39
+
40
+ def known?(key, value)
41
+ instance.known?(key, value)
42
+ end
43
+
44
+ def enrich(info, map_keys = {})
45
+ instance.enrich(info, map_keys)
46
+ end
47
+
48
+ def normalize(info, map_keys = {})
49
+ instance.normalize(info, map_keys)
50
+ end
51
+ end
52
+
53
+ def implementation=(impl)
54
+ @implementation = impl
55
+ end
56
+
57
+ def get_field(format, field)
58
+ get_field_by(:name, format, field)
59
+ end
60
+
61
+ def get_field_by(key, value, field)
62
+ info = get_info_by(key, value)
63
+ return nil unless info
64
+ case field
65
+ when :mimetype
66
+ info[:mimetypes]&.first
67
+ when :puid
68
+ info[:puids]&.first
69
+ when :extension
70
+ info[:extensions]&.first
71
+ else
72
+ info[field]
73
+ end
74
+ end
75
+
76
+ def get_fields_by(key, value, field)
77
+ get_infos_by(key, value)&.map { |info| info[field] }.compact
78
+ end
79
+
80
+ def get_info(format)
81
+ get_info_by(:name, format)
82
+ end
83
+
84
+ def get_info_by(key, value)
85
+ get_infos_by(key, value)&.first
86
+ end
87
+
88
+ def get_infos_by(key, value)
89
+ result = @implementation.query(key, value)
90
+ result.map(&:to_hash)
91
+ end
92
+
93
+ def known?(key, value)
94
+ !get_info_by(key, value).nil?
95
+ end
96
+
97
+ def enrich(info, map_keys = {})
98
+ info = normalize(info, map_keys)
99
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
100
+ unless (format = info[mapper[:name]]).nil?
101
+ lib_info = get_info(format)
102
+ mapper.keys.each do |key|
103
+ case key
104
+ when :mimetype
105
+ info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
106
+ when :puid
107
+ info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
108
+ when :extension
109
+ info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
110
+ else
111
+ info[mapper[key]] = lib_info[key] if lib_info[key]
112
+ end
113
+ end
114
+ end
115
+ info
116
+ end
117
+
118
+ # Derive name from the available info
119
+ def normalize(info, map_keys = {})
120
+ return {} unless info.is_a? Hash
121
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
122
+ # fill format from looking up by puid
123
+ unless (puid = info[mapper[:puid]]).blank?
124
+ info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
125
+ end
126
+ # fill format from looking up by mimetype
127
+ unless (mime = info[mapper[:mimetype]]).blank?
128
+ info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
129
+ end
130
+ # finally complete the information from looking up by format name
131
+ unless (format = info[mapper[:name]]).nil?
132
+ info[mapper[:mimetype]] = get_field(format, :mimetype)
133
+ info[mapper[:category]] = get_field(format, :category)
134
+ end
135
+ info
136
+ end
137
+
138
+ private
139
+
140
+ def initialize
141
+ @implementation = eval(Libis::Format::Config[:format_library_implementation])
142
+ end
143
+
144
+ end
145
+
146
+ end
147
+ end
@@ -12,10 +12,13 @@ module Libis
12
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
13
13
  autoload :FFMpeg, 'libis/format/tool/ffmpeg'
14
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
15
- autoload :PdfCopy, 'libis/format/tool/pdf_copy'
16
15
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
16
+ autoload :PdfMetadata, 'libis/format/tool/pdf_metadata'
17
17
  autoload :PdfOptimizer, 'libis/format/tool/pdf_optimizer'
18
+ autoload :PdfProtect, 'libis/format/tool/pdf_protect'
19
+ autoload :PdfSelect, 'libis/format/tool/pdf_select'
18
20
  autoload :PdfSplit, 'libis/format/tool/pdf_split'
21
+ autoload :PdfWatermark, 'libis/format/tool/pdf_watermark'
19
22
  autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
20
23
  autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
21
24
 
@@ -37,14 +37,14 @@ module Libis
37
37
  def runner(*args)
38
38
 
39
39
  args.map do |file|
40
- info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
40
+ info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
41
41
  if info
42
42
  {
43
43
  filepath: file,
44
- mimetype: (info[:MIME].first rescue nil),
45
- puid: (info[:PUID].first rescue nil),
44
+ mimetype: (info[:mimetypes].first rescue nil),
45
+ puid: (info[:puids].first rescue nil),
46
46
  matchtype: 'extension',
47
- tool: :type_database
47
+ tool: :format_library
48
48
  }
49
49
  end
50
50
  end.cleanup
@@ -7,7 +7,7 @@ require 'libis/tools/logger'
7
7
  require 'libis/tools/command'
8
8
 
9
9
  require 'libis/format/config'
10
- require 'libis/format/type_database'
10
+ require 'libis/format/library'
11
11
 
12
12
  module Libis
13
13
  module Format
@@ -99,7 +99,7 @@ module Libis
99
99
  end
100
100
 
101
101
  # Normalize the mimetype
102
- Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
102
+ Libis::Format::Library.normalize(result)
103
103
 
104
104
  # Default score is 5
105
105
  result[:score] = 5
@@ -117,14 +117,14 @@ module Libis
117
117
  # Signature match increases score with 2
118
118
  when 'signature'
119
119
  result[:score] += 2
120
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
120
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
121
121
  # ext = File.extname(result[:filename])
122
122
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
123
123
 
124
124
  # Container match increases score with 4
125
125
  when 'container'
126
126
  result[:score] += 4
127
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
127
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
128
128
  # ext = File.extname(result[:filename])
129
129
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
130
130
 
@@ -152,11 +152,11 @@ module Libis
152
152
  end
153
153
 
154
154
  def get_mimetype(puid)
155
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
155
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
156
156
  end
157
157
 
158
158
  def get_puid(mimetype)
159
- ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
159
+ ::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
160
160
  end
161
161
 
162
162
  attr_accessor :bad_mimetypes, :bad_puids
@@ -27,9 +27,9 @@ module Libis
27
27
  timeout = Libis::Format::Config[:timeouts][:pdf_merge]
28
28
  result = Libis::Tools::Command.run(
29
29
  Libis::Format::Config[:java_cmd],
30
- '-cp', Libis::Format::Config[:pdf_tool],
31
- 'MergePdf',
32
- '--file_output', target,
30
+ '-jar', Libis::Format::Config[:pdf_tool],
31
+ 'merge',
32
+ '-o', target,
33
33
  *options,
34
34
  *source,
35
35
  timeout: timeout,
@@ -10,7 +10,7 @@ module Libis
10
10
  module Format
11
11
  module Tool
12
12
 
13
- class PdfCopy
13
+ class PdfMetadata
14
14
  include ::Libis::Tools::Logger
15
15
 
16
16
  def self.run(source, target, options = [])
@@ -26,10 +26,10 @@ module Libis
26
26
  timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
27
  result = Libis::Tools::Command.run(
28
28
  Libis::Format::Config[:java_cmd],
29
- '-cp', Libis::Format::Config[:pdf_tool],
30
- 'CopyPdf',
31
- '--file_input', source,
32
- '--file_output', target,
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'metadata',
31
+ '-i', source,
32
+ '-o', target,
33
33
  *options,
34
34
  timeout: timeout,
35
35
  kill_after: timeout * 2
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfProtect
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'protect',
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfSelect
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_select]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'select',
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -26,10 +26,10 @@ module Libis
26
26
  timeout = Libis::Format::Config[:timeouts][:pdf_split]
27
27
  result = Libis::Tools::Command.run(
28
28
  Libis::Format::Config[:java_cmd],
29
- '-cp', Libis::Format::Config[:pdf_tool],
30
- 'SplitPdf',
31
- '--file_input', source,
32
- '--file_output', target,
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'split',
31
+ '-i', source,
32
+ '-o', target,
33
33
  *options,
34
34
  timeout: timeout,
35
35
  kill_after: timeout * 2
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfWatermark
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, wm_type, options = [])
17
+ self.new.run source, target, wm_type, options
18
+ end
19
+
20
+ def run(source, target, wm_type, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_watermark]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'watermark', wm_type,
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end