libis-format 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +40 -153
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +48 -0
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +47 -0
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +85 -125
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,52 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'os'
4
-
5
- require 'libis/tools/extend/string'
6
- require 'libis/tools/logger'
7
- require 'libis/tools/command'
8
-
9
- require 'libis/format/config'
10
-
11
- module Libis
12
- module Format
13
- module Tool
14
- class PdfTool
15
- include ::Libis::Tools::Logger
16
-
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:java_cmd], '-version')
19
- return false unless (result[:status]).zero?
20
-
21
- File.exist?(Libis::Format::Config[:pdf_tool])
22
- end
23
-
24
- def self.run(command, source, target, *options)
25
- new.run command, source, target, *options
26
- end
27
-
28
- def run(command, source, target, *options)
29
- if OS.java?
30
- # TODO: import library and execute in current VM. For now do exactly as in MRI.
31
- end
32
-
33
- timeout = Libis::Format::Config[:timeouts][:pdf_tool]
34
- args = [
35
- Libis::Format::Config[:java_cmd],
36
- '-jar', Libis::Format::Config[:pdf_tool],
37
- [command],
38
- '-i', source,
39
- '-o', target,
40
- options,
41
- ].flatten
42
-
43
- result = Libis::Tools::Command.run(*args, timeout: , kill_after: timeout * 2)
44
-
45
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
46
-
47
- result
48
- end
49
- end
50
- end
51
- end
52
- end
@@ -1,156 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'yaml'
4
- require 'libis/tools/extend/hash'
5
-
6
- module Libis
7
- module Format
8
- # noinspection RubyClassVariableUsageInspection
9
- class TypeDatabase
10
- @implementation = Libis::Format::TypeDatabaseImpl.instance
11
-
12
- def self.implementation(impl)
13
- @implementation = impl
14
- end
15
-
16
- def self.enrich(info, map_keys = {})
17
- return {} unless info.is_a? Hash
18
-
19
- mapper = Hash.new { |hash, key| hash[key] = key }
20
- mapper.merge! map_keys
21
- unless (puid = info[mapper[:PUID]]).blank?
22
- info[mapper[:TYPE]] ||= begin
23
- puid_infos(puid).first[:TYPE]
24
- rescue StandardError
25
- nil
26
- end
27
- end
28
- unless (mime = info[mapper[:MIME]]).blank?
29
- info[mapper[:TYPE]] ||= begin
30
- mime_infos(mime).first[:TYPE]
31
- rescue StandardError
32
- nil
33
- end
34
- end
35
- unless (type_name = info[mapper[:TYPE]]).nil?
36
- mapper.each_key do |key|
37
- info[mapper[key]] = get(type_name, key) || info[mapper[key]]
38
- end
39
- info[mapper[:GROUP]] = type_group(type_name)
40
- end
41
- info
42
- end
43
-
44
- def self.normalize(info, map_keys = {})
45
- return {} unless info.is_a? Hash
46
-
47
- mapper = Hash.new { |hash, key| hash[key] = key }
48
- mapper.merge! map_keys
49
- unless (puid = info[mapper[:PUID]]).blank?
50
- info[mapper[:TYPE]] ||= begin
51
- puid_infos(puid).first[:TYPE]
52
- rescue StandardError
53
- nil
54
- end
55
- end
56
- unless (mime = info[mapper[:MIME]]).blank?
57
- info[mapper[:TYPE]] ||= begin
58
- mime_infos(mime).first[:TYPE]
59
- rescue StandardError
60
- nil
61
- end
62
- end
63
- unless (type_name = info[mapper[:TYPE]]).nil?
64
- info[mapper[:MIME]] = type_mimetypes(type_name).first if type_mimetypes(type_name).first
65
- info[mapper[:GROUP]] = type_group(type_name)
66
- end
67
- info
68
- end
69
-
70
- def self.get(type_name, key)
71
- case key
72
- when :MIME
73
- type_mimetypes(type_name).first
74
- when :PUID
75
- type_puids(type_name).first
76
- when :EXTENSION
77
- type_extentions(type_name).first
78
- else
79
- typeinfo(type_name)[key]
80
- end
81
- end
82
-
83
- def self.type_group(ftype)
84
- typeinfo(ftype)[:GROUP]
85
- end
86
-
87
- def self.type_mimetypes(ftype)
88
- typeinfo(ftype)[:MIME] || []
89
- end
90
-
91
- def self.type_puids(ftype)
92
- typeinfo(ftype)[:PUID] || []
93
- end
94
-
95
- def self.type_extentions(ftype)
96
- typeinfo(ftype)[:EXTENSIONS] || []
97
- end
98
-
99
- def self.typeinfo(ftype)
100
- @implementation.typeinfo(ftype)
101
- end
102
-
103
- def self.group_types(group)
104
- @implementation.group_types(group)
105
- end
106
-
107
- def self.puid_infos(puid)
108
- @implementation.puid_infos(puid)
109
- end
110
-
111
- def self.puid_types(puid)
112
- @implementation.puid_types(puid)
113
- end
114
-
115
- def self.puid_groups(puid)
116
- puid_types(puid).map(&method(:type_group))
117
- end
118
-
119
- def self.mime_infos(mime)
120
- @implementation.mime_infos(mime)
121
- end
122
-
123
- def self.mime_types(mime)
124
- @implementation.mime_types(mime)
125
- end
126
-
127
- def self.mime_groups(mime)
128
- mime_types(mime).map(&method(:type_group))
129
- end
130
-
131
- def self.ext_infos(ext)
132
- @implementation.ext_infos(ext)
133
- end
134
-
135
- def self.ext_types(ext)
136
- @implementation.ext_types(ext)
137
- end
138
-
139
- def self.puid_typeinfo(puid)
140
- @implementation.puid_typeinfo(puid)
141
- end
142
-
143
- def self.known_mime?(mime)
144
- @implementation.known_mime?(mime)
145
- end
146
-
147
- def self.groups
148
- @implementation.groups
149
- end
150
-
151
- def self.export_csv(filename, **options)
152
- @implementation.export_csv(filename, **options)
153
- end
154
- end
155
- end
156
- end
@@ -1,153 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require 'singleton'
4
- require 'yaml'
5
- require 'csv'
6
-
7
- require 'libis/tools/logger'
8
- require 'libis/tools/extend/hash'
9
- require 'libis/tools/extend/string'
10
- require 'libis/tools/extend/symbol'
11
-
12
- module Libis
13
- module Format
14
- class TypeDatabaseImpl
15
- include Singleton
16
- include ::Libis::Tools::Logger
17
-
18
- def typeinfo(ftype)
19
- @types[ftype.to_sym] || {}
20
- end
21
-
22
- def group_types(group)
23
- @types.select do |_, v|
24
- v[:GROUP] == group.to_sym
25
- end.keys
26
- end
27
-
28
- def puid_infos(puid)
29
- @types.select do |_, v|
30
- v[:PUID].include? puid
31
- rescue StandardError
32
- false
33
- end.values
34
- end
35
-
36
- def puid_types(puid)
37
- @types.select do |_, v|
38
- v[:PUID].include? puid
39
- rescue StandardError
40
- false
41
- end.keys
42
- end
43
-
44
- def mime_infos(mime)
45
- @types.select do |_, v|
46
- v[:MIME].include? mime
47
- rescue StandardError
48
- false
49
- end.values
50
- end
51
-
52
- def mime_types(mime)
53
- @types.select do |_, v|
54
- v[:MIME].include? mime
55
- rescue StandardError
56
- false
57
- end.keys
58
- end
59
-
60
- def ext_infos(ext)
61
- ext = ext.gsub(/^\./, '')
62
- @types.select do |_, v|
63
- v[:EXTENSIONS].include?(ext)
64
- rescue StandardError
65
- false
66
- end.values
67
- end
68
-
69
- def ext_types(ext)
70
- ext = ext.gsub(/^\./, '')
71
- @types.select do |_, v|
72
- v[:EXTENSIONS].include?(ext)
73
- rescue StandardError
74
- false
75
- end.keys
76
- end
77
-
78
- def puid_typeinfo(puid)
79
- @types.each do |_, v|
80
- return v if v[:PUID]&.include?(puid)
81
- end
82
- nil
83
- end
84
-
85
- def known_mime?(mime)
86
- @types.each do |_, v|
87
- return true if v[:MIME]&.include? mime
88
- end
89
- false
90
- end
91
-
92
- def groups
93
- @types.values.map(&:dig.with(:GROUP)).uniq
94
- end
95
-
96
- def export_csv(filename, **options)
97
- headers = @types.values.each_with_object(Set.new) { |v, s| v.each_key { |k| s << k.to_s } }
98
- options[:headers] = headers.to_a
99
- CSV.open(filename, 'w', **options) do |csv|
100
- @types.each_value do |v|
101
- csv << CSV::Row.new(v.keys, v.values.map { |x| x.is_a?(Array) ? x.join(', ') : x })
102
- end
103
- end
104
- end
105
-
106
- def load_types(file_or_hash = {}, append = true)
107
- hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML.load_file(file_or_hash)
108
- # noinspection RubyResolve
109
- hash.each do |group, type_info|
110
- type_info.each do |type_name, info|
111
- type_key = type_name.to_sym
112
- info.symbolize_keys!
113
- info[:TYPE] = type_key
114
- info[:GROUP] = group.to_sym
115
- info[:MIME] = begin
116
- info[:MIME].strip.split(/[\s,]+/).map(&:strip)
117
- rescue StandardError
118
- []
119
- end
120
- info[:EXTENSIONS] = begin
121
- info[:EXTENSIONS].strip.split(/[\s,]+/).map(&:strip)
122
- rescue StandardError
123
- []
124
- end
125
- info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map(&:strip) if info[:PUID]
126
- if @types.key?(type_key)
127
- warn 'Type %s already defined; merging with info from %s.', type_name, file_or_hash
128
- info.merge!(@types[type_key]) do |_, v_new, v_old|
129
- case v_old
130
- when Array
131
- append ? v_old + v_new : v_new + v_old
132
- when Hash
133
- append ? v_new.merge(v_old) : v_old.merge(v_new)
134
- else
135
- append ? v_old : v_new
136
- end
137
- end
138
- end
139
- @types[type_key] = info
140
- end
141
- end
142
- end
143
-
144
- protected
145
-
146
- def initialize
147
- @types = {}
148
- type_database = Libis::Format::Config[:type_database]
149
- load_types(type_database)
150
- end
151
- end
152
- end
153
- end