libis-format 1.0.5 → 2.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +5 -1
  3. data/.travis.yml +32 -24
  4. data/README.md +2 -2
  5. data/base/Dockerfile +24 -2
  6. data/base/Dockerfile.alpine +20 -0
  7. data/base/Dockerfile.rvm +56 -0
  8. data/base/rework_path +20 -0
  9. data/docker_cfg.yml +1 -0
  10. data/lib/libis/format.rb +12 -3
  11. data/lib/libis/format/cli/convert.rb +4 -4
  12. data/lib/libis/format/config.rb +16 -12
  13. data/lib/libis/format/converter/audio_converter.rb +2 -36
  14. data/lib/libis/format/converter/base.rb +22 -8
  15. data/lib/libis/format/converter/chain.rb +3 -3
  16. data/lib/libis/format/converter/image_assembler.rb +82 -0
  17. data/lib/libis/format/converter/image_converter.rb +20 -138
  18. data/lib/libis/format/converter/image_splitter.rb +84 -0
  19. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  20. data/lib/libis/format/converter/jp2_converter.rb +1 -1
  21. data/lib/libis/format/converter/office_converter.rb +2 -2
  22. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +6 -132
  24. data/lib/libis/format/converter/pdf_metadata.rb +82 -0
  25. data/lib/libis/format/converter/pdf_optimizer.rb +67 -0
  26. data/lib/libis/format/converter/pdf_protecter.rb +147 -0
  27. data/lib/libis/format/converter/pdf_selecter.rb +83 -0
  28. data/lib/libis/format/converter/pdf_splitter.rb +70 -0
  29. data/lib/libis/format/converter/pdf_watermarker_header.rb +71 -0
  30. data/lib/libis/format/converter/pdf_watermarker_image.rb +76 -0
  31. data/lib/libis/format/converter/pdf_watermarker_text.rb +93 -0
  32. data/lib/libis/format/converter/spreadsheet_converter.rb +2 -2
  33. data/lib/libis/format/converter/video_converter.rb +1 -1
  34. data/lib/libis/format/identifier.rb +3 -3
  35. data/lib/libis/format/info.rb +27 -0
  36. data/lib/libis/format/library.rb +147 -0
  37. data/lib/libis/format/tool.rb +4 -1
  38. data/lib/libis/format/tool/extension_identification.rb +4 -4
  39. data/lib/libis/format/tool/identification_tool.rb +6 -6
  40. data/lib/libis/format/tool/pdf_merge.rb +3 -3
  41. data/lib/libis/format/tool/{pdf_copy.rb → pdf_metadata.rb} +5 -5
  42. data/lib/libis/format/tool/pdf_protect.rb +47 -0
  43. data/lib/libis/format/tool/pdf_select.rb +47 -0
  44. data/lib/libis/format/tool/pdf_split.rb +4 -4
  45. data/lib/libis/format/tool/pdf_watermark.rb +47 -0
  46. data/lib/libis/format/tool/spreadsheet_to_ods.rb +1 -0
  47. data/lib/libis/format/version.rb +1 -1
  48. data/lib/libis/format/yaml_loader.rb +71 -0
  49. data/libis-format.gemspec +3 -2
  50. data/tools/PdfTool.jar +0 -0
  51. data/tools/bcpkix-jdk15on-167.jar +0 -0
  52. data/tools/bcprov-jdk15on-167.jar +0 -0
  53. metadata +32 -13
  54. data/lib/libis/format/type_database.rb +0 -134
  55. data/lib/libis/format/type_database_impl.rb +0 -120
  56. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  57. data/tools/bcprov-jdk15on-1.49.jar +0 -0
@@ -0,0 +1,27 @@
1
+ module Libis
2
+ module Format
3
+ class Info
4
+ attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
5
+
6
+ def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
7
+ @name = name
8
+ @category = category
9
+ @description = description
10
+ @puids = puids
11
+ @mimetypes = mimetypes
12
+ @extensions = extensions
13
+ end
14
+
15
+ def to_hash
16
+ {
17
+ name: name,
18
+ description: description.dup,
19
+ category: category,
20
+ puids: puids.dup,
21
+ mimetypes: mimetypes.dup,
22
+ extensions: extensions.dup
23
+ }
24
+ end
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,147 @@
1
+ # froze_string_litteral: true
2
+ # coding: utf-8
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+
8
+ class Library
9
+ include Singleton
10
+
11
+ class << self
12
+ def implementation=(impl)
13
+ instance.implementation = impl
14
+ end
15
+
16
+ def get_info(format)
17
+ instance.get_info(format)
18
+ end
19
+
20
+ def get_info_by(key, value)
21
+ instance.get_info_by(key, value)
22
+ end
23
+
24
+ def get_infos_by(key, value)
25
+ instance.get_infos_by(key, value)
26
+ end
27
+
28
+ def get_field(format, field)
29
+ instance.get_field(format, field)
30
+ end
31
+
32
+ def get_field_by(key, value, field)
33
+ instance.get_field_by(key, value, field)
34
+ end
35
+
36
+ def get_fields_by(key, value, field)
37
+ instance.get_fields_by(key, value, field)
38
+ end
39
+
40
+ def known?(key, value)
41
+ instance.known?(key, value)
42
+ end
43
+
44
+ def enrich(info, map_keys = {})
45
+ instance.enrich(info, map_keys)
46
+ end
47
+
48
+ def normalize(info, map_keys = {})
49
+ instance.normalize(info, map_keys)
50
+ end
51
+ end
52
+
53
+ def implementation=(impl)
54
+ @implementation = impl
55
+ end
56
+
57
+ def get_field(format, field)
58
+ get_field_by(:name, format, field)
59
+ end
60
+
61
+ def get_field_by(key, value, field)
62
+ info = get_info_by(key, value)
63
+ return nil unless info
64
+ case field
65
+ when :mimetype
66
+ info[:mimetypes]&.first
67
+ when :puid
68
+ info[:puids]&.first
69
+ when :extension
70
+ info[:extensions]&.first
71
+ else
72
+ info[field]
73
+ end
74
+ end
75
+
76
+ def get_fields_by(key, value, field)
77
+ get_infos_by(key, value)&.map { |info| info[field] }.compact
78
+ end
79
+
80
+ def get_info(format)
81
+ get_info_by(:name, format)
82
+ end
83
+
84
+ def get_info_by(key, value)
85
+ get_infos_by(key, value)&.first
86
+ end
87
+
88
+ def get_infos_by(key, value)
89
+ result = @implementation.query(key, value)
90
+ result.map(&:to_hash)
91
+ end
92
+
93
+ def known?(key, value)
94
+ !get_info_by(key, value).nil?
95
+ end
96
+
97
+ def enrich(info, map_keys = {})
98
+ info = normalize(info, map_keys)
99
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
100
+ unless (format = info[mapper[:name]]).nil?
101
+ lib_info = get_info(format)
102
+ mapper.keys.each do |key|
103
+ case key
104
+ when :mimetype
105
+ info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
106
+ when :puid
107
+ info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
108
+ when :extension
109
+ info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
110
+ else
111
+ info[mapper[key]] = lib_info[key] if lib_info[key]
112
+ end
113
+ end
114
+ end
115
+ info
116
+ end
117
+
118
+ # Derive name from the available info
119
+ def normalize(info, map_keys = {})
120
+ return {} unless info.is_a? Hash
121
+ mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
122
+ # fill format from looking up by puid
123
+ unless (puid = info[mapper[:puid]]).blank?
124
+ info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
125
+ end
126
+ # fill format from looking up by mimetype
127
+ unless (mime = info[mapper[:mimetype]]).blank?
128
+ info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
129
+ end
130
+ # finally complete the information from looking up by format name
131
+ unless (format = info[mapper[:name]]).nil?
132
+ info[mapper[:mimetype]] = get_field(format, :mimetype)
133
+ info[mapper[:category]] = get_field(format, :category)
134
+ end
135
+ info
136
+ end
137
+
138
+ private
139
+
140
+ def initialize
141
+ @implementation = eval(Libis::Format::Config[:format_library_implementation])
142
+ end
143
+
144
+ end
145
+
146
+ end
147
+ end
@@ -12,10 +12,13 @@ module Libis
12
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
13
13
  autoload :FFMpeg, 'libis/format/tool/ffmpeg'
14
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
15
- autoload :PdfCopy, 'libis/format/tool/pdf_copy'
16
15
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
16
+ autoload :PdfMetadata, 'libis/format/tool/pdf_metadata'
17
17
  autoload :PdfOptimizer, 'libis/format/tool/pdf_optimizer'
18
+ autoload :PdfProtect, 'libis/format/tool/pdf_protect'
19
+ autoload :PdfSelect, 'libis/format/tool/pdf_select'
18
20
  autoload :PdfSplit, 'libis/format/tool/pdf_split'
21
+ autoload :PdfWatermark, 'libis/format/tool/pdf_watermark'
19
22
  autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
20
23
  autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
21
24
 
@@ -37,14 +37,14 @@ module Libis
37
37
  def runner(*args)
38
38
 
39
39
  args.map do |file|
40
- info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
40
+ info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
41
41
  if info
42
42
  {
43
43
  filepath: file,
44
- mimetype: (info[:MIME].first rescue nil),
45
- puid: (info[:PUID].first rescue nil),
44
+ mimetype: (info[:mimetypes].first rescue nil),
45
+ puid: (info[:puids].first rescue nil),
46
46
  matchtype: 'extension',
47
- tool: :type_database
47
+ tool: :format_library
48
48
  }
49
49
  end
50
50
  end.cleanup
@@ -7,7 +7,7 @@ require 'libis/tools/logger'
7
7
  require 'libis/tools/command'
8
8
 
9
9
  require 'libis/format/config'
10
- require 'libis/format/type_database'
10
+ require 'libis/format/library'
11
11
 
12
12
  module Libis
13
13
  module Format
@@ -99,7 +99,7 @@ module Libis
99
99
  end
100
100
 
101
101
  # Normalize the mimetype
102
- Libis::Format::TypeDatabase.normalize(result, PUID: :puid, MIME: :mimetype)
102
+ Libis::Format::Library.normalize(result)
103
103
 
104
104
  # Default score is 5
105
105
  result[:score] = 5
@@ -117,14 +117,14 @@ module Libis
117
117
  # Signature match increases score with 2
118
118
  when 'signature'
119
119
  result[:score] += 2
120
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
120
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
121
121
  # ext = File.extname(result[:filename])
122
122
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
123
123
 
124
124
  # Container match increases score with 4
125
125
  when 'container'
126
126
  result[:score] += 4
127
- # typeinfo = ::Libis::Format::TypeDatabase.puid_typeinfo(result[:puid])
127
+ # typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
128
128
  # ext = File.extname(result[:filename])
129
129
  # result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
130
130
 
@@ -152,11 +152,11 @@ module Libis
152
152
  end
153
153
 
154
154
  def get_mimetype(puid)
155
- ::Libis::Format::TypeDatabase.puid_typeinfo(puid)[:MIME].first rescue nil
155
+ ::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
156
156
  end
157
157
 
158
158
  def get_puid(mimetype)
159
- ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
159
+ ::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
160
160
  end
161
161
 
162
162
  attr_accessor :bad_mimetypes, :bad_puids
@@ -27,9 +27,9 @@ module Libis
27
27
  timeout = Libis::Format::Config[:timeouts][:pdf_merge]
28
28
  result = Libis::Tools::Command.run(
29
29
  Libis::Format::Config[:java_cmd],
30
- '-cp', Libis::Format::Config[:pdf_tool],
31
- 'MergePdf',
32
- '--file_output', target,
30
+ '-jar', Libis::Format::Config[:pdf_tool],
31
+ 'merge',
32
+ '-o', target,
33
33
  *options,
34
34
  *source,
35
35
  timeout: timeout,
@@ -10,7 +10,7 @@ module Libis
10
10
  module Format
11
11
  module Tool
12
12
 
13
- class PdfCopy
13
+ class PdfMetadata
14
14
  include ::Libis::Tools::Logger
15
15
 
16
16
  def self.run(source, target, options = [])
@@ -26,10 +26,10 @@ module Libis
26
26
  timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
27
  result = Libis::Tools::Command.run(
28
28
  Libis::Format::Config[:java_cmd],
29
- '-cp', Libis::Format::Config[:pdf_tool],
30
- 'CopyPdf',
31
- '--file_input', source,
32
- '--file_output', target,
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'metadata',
31
+ '-i', source,
32
+ '-o', target,
33
33
  *options,
34
34
  timeout: timeout,
35
35
  kill_after: timeout * 2
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfProtect
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_copy]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'protect',
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfSelect
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_select]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'select',
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -26,10 +26,10 @@ module Libis
26
26
  timeout = Libis::Format::Config[:timeouts][:pdf_split]
27
27
  result = Libis::Tools::Command.run(
28
28
  Libis::Format::Config[:java_cmd],
29
- '-cp', Libis::Format::Config[:pdf_tool],
30
- 'SplitPdf',
31
- '--file_input', source,
32
- '--file_output', target,
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'split',
31
+ '-i', source,
32
+ '-o', target,
33
33
  *options,
34
34
  timeout: timeout,
35
35
  kill_after: timeout * 2
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfWatermark
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, wm_type, options = [])
17
+ self.new.run source, target, wm_type, options
18
+ end
19
+
20
+ def run(source, target, wm_type, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_watermark]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-jar', Libis::Format::Config[:pdf_tool],
30
+ 'watermark', wm_type,
31
+ '-i', source,
32
+ '-o', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end