libis-format 1.0.5 → 2.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +5 -1
- data/.travis.yml +32 -24
- data/README.md +2 -2
- data/base/Dockerfile +24 -2
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/docker_cfg.yml +1 -0
- data/lib/libis/format.rb +12 -3
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/config.rb +16 -12
- data/lib/libis/format/converter/audio_converter.rb +2 -36
- data/lib/libis/format/converter/base.rb +22 -8
- data/lib/libis/format/converter/chain.rb +3 -3
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +20 -138
- data/lib/libis/format/converter/image_splitter.rb +84 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +1 -1
- data/lib/libis/format/converter/office_converter.rb +2 -2
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +6 -132
- data/lib/libis/format/converter/pdf_metadata.rb +82 -0
- data/lib/libis/format/converter/pdf_optimizer.rb +67 -0
- data/lib/libis/format/converter/pdf_protecter.rb +147 -0
- data/lib/libis/format/converter/pdf_selecter.rb +83 -0
- data/lib/libis/format/converter/pdf_splitter.rb +70 -0
- data/lib/libis/format/converter/pdf_watermarker_header.rb +71 -0
- data/lib/libis/format/converter/pdf_watermarker_image.rb +76 -0
- data/lib/libis/format/converter/pdf_watermarker_text.rb +93 -0
- data/lib/libis/format/converter/spreadsheet_converter.rb +2 -2
- data/lib/libis/format/converter/video_converter.rb +1 -1
- data/lib/libis/format/identifier.rb +3 -3
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool.rb +4 -1
- data/lib/libis/format/tool/extension_identification.rb +4 -4
- data/lib/libis/format/tool/identification_tool.rb +6 -6
- data/lib/libis/format/tool/pdf_merge.rb +3 -3
- data/lib/libis/format/tool/{pdf_copy.rb → pdf_metadata.rb} +5 -5
- data/lib/libis/format/tool/pdf_protect.rb +47 -0
- data/lib/libis/format/tool/pdf_select.rb +47 -0
- data/lib/libis/format/tool/pdf_split.rb +4 -4
- data/lib/libis/format/tool/pdf_watermark.rb +47 -0
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +1 -0
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/libis-format.gemspec +3 -2
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-167.jar +0 -0
- data/tools/bcprov-jdk15on-167.jar +0 -0
- metadata +32 -13
- data/lib/libis/format/type_database.rb +0 -134
- data/lib/libis/format/type_database_impl.rb +0 -120
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
@@ -0,0 +1,27 @@
|
|
1
|
+
module Libis
|
2
|
+
module Format
|
3
|
+
class Info
|
4
|
+
attr_reader :name, :category, :description, :puids, :mimetypes, :extensions
|
5
|
+
|
6
|
+
def initialize(name:, category:, description: '', puids: [], mimetypes: [], extensions: [])
|
7
|
+
@name = name
|
8
|
+
@category = category
|
9
|
+
@description = description
|
10
|
+
@puids = puids
|
11
|
+
@mimetypes = mimetypes
|
12
|
+
@extensions = extensions
|
13
|
+
end
|
14
|
+
|
15
|
+
def to_hash
|
16
|
+
{
|
17
|
+
name: name,
|
18
|
+
description: description.dup,
|
19
|
+
category: category,
|
20
|
+
puids: puids.dup,
|
21
|
+
mimetypes: mimetypes.dup,
|
22
|
+
extensions: extensions.dup
|
23
|
+
}
|
24
|
+
end
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,147 @@
|
|
1
|
+
# froze_string_litteral: true
|
2
|
+
# coding: utf-8
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
|
8
|
+
class Library
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
class << self
|
12
|
+
def implementation=(impl)
|
13
|
+
instance.implementation = impl
|
14
|
+
end
|
15
|
+
|
16
|
+
def get_info(format)
|
17
|
+
instance.get_info(format)
|
18
|
+
end
|
19
|
+
|
20
|
+
def get_info_by(key, value)
|
21
|
+
instance.get_info_by(key, value)
|
22
|
+
end
|
23
|
+
|
24
|
+
def get_infos_by(key, value)
|
25
|
+
instance.get_infos_by(key, value)
|
26
|
+
end
|
27
|
+
|
28
|
+
def get_field(format, field)
|
29
|
+
instance.get_field(format, field)
|
30
|
+
end
|
31
|
+
|
32
|
+
def get_field_by(key, value, field)
|
33
|
+
instance.get_field_by(key, value, field)
|
34
|
+
end
|
35
|
+
|
36
|
+
def get_fields_by(key, value, field)
|
37
|
+
instance.get_fields_by(key, value, field)
|
38
|
+
end
|
39
|
+
|
40
|
+
def known?(key, value)
|
41
|
+
instance.known?(key, value)
|
42
|
+
end
|
43
|
+
|
44
|
+
def enrich(info, map_keys = {})
|
45
|
+
instance.enrich(info, map_keys)
|
46
|
+
end
|
47
|
+
|
48
|
+
def normalize(info, map_keys = {})
|
49
|
+
instance.normalize(info, map_keys)
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
def implementation=(impl)
|
54
|
+
@implementation = impl
|
55
|
+
end
|
56
|
+
|
57
|
+
def get_field(format, field)
|
58
|
+
get_field_by(:name, format, field)
|
59
|
+
end
|
60
|
+
|
61
|
+
def get_field_by(key, value, field)
|
62
|
+
info = get_info_by(key, value)
|
63
|
+
return nil unless info
|
64
|
+
case field
|
65
|
+
when :mimetype
|
66
|
+
info[:mimetypes]&.first
|
67
|
+
when :puid
|
68
|
+
info[:puids]&.first
|
69
|
+
when :extension
|
70
|
+
info[:extensions]&.first
|
71
|
+
else
|
72
|
+
info[field]
|
73
|
+
end
|
74
|
+
end
|
75
|
+
|
76
|
+
def get_fields_by(key, value, field)
|
77
|
+
get_infos_by(key, value)&.map { |info| info[field] }.compact
|
78
|
+
end
|
79
|
+
|
80
|
+
def get_info(format)
|
81
|
+
get_info_by(:name, format)
|
82
|
+
end
|
83
|
+
|
84
|
+
def get_info_by(key, value)
|
85
|
+
get_infos_by(key, value)&.first
|
86
|
+
end
|
87
|
+
|
88
|
+
def get_infos_by(key, value)
|
89
|
+
result = @implementation.query(key, value)
|
90
|
+
result.map(&:to_hash)
|
91
|
+
end
|
92
|
+
|
93
|
+
def known?(key, value)
|
94
|
+
!get_info_by(key, value).nil?
|
95
|
+
end
|
96
|
+
|
97
|
+
def enrich(info, map_keys = {})
|
98
|
+
info = normalize(info, map_keys)
|
99
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
100
|
+
unless (format = info[mapper[:name]]).nil?
|
101
|
+
lib_info = get_info(format)
|
102
|
+
mapper.keys.each do |key|
|
103
|
+
case key
|
104
|
+
when :mimetype
|
105
|
+
info[mapper[key]] = lib_info[:mimetypes].first if lib_info[:mimetypes].first
|
106
|
+
when :puid
|
107
|
+
info[mapper[key]] = lib_info[:puids].first if lib_info[:puids].first
|
108
|
+
when :extension
|
109
|
+
info[mapper[key]] = lib_info[:extensions].first if lib_info[:extensions].first
|
110
|
+
else
|
111
|
+
info[mapper[key]] = lib_info[key] if lib_info[key]
|
112
|
+
end
|
113
|
+
end
|
114
|
+
end
|
115
|
+
info
|
116
|
+
end
|
117
|
+
|
118
|
+
# Derive name from the available info
|
119
|
+
def normalize(info, map_keys = {})
|
120
|
+
return {} unless info.is_a? Hash
|
121
|
+
mapper = Hash.new { |hash, key| hash[key] = key }.merge(map_keys)
|
122
|
+
# fill format from looking up by puid
|
123
|
+
unless (puid = info[mapper[:puid]]).blank?
|
124
|
+
info[mapper[:name]] ||= get_field_by(:puid, puid, :name)
|
125
|
+
end
|
126
|
+
# fill format from looking up by mimetype
|
127
|
+
unless (mime = info[mapper[:mimetype]]).blank?
|
128
|
+
info[mapper[:name]] ||= get_field_by(:mimetype, mime, :name)
|
129
|
+
end
|
130
|
+
# finally complete the information from looking up by format name
|
131
|
+
unless (format = info[mapper[:name]]).nil?
|
132
|
+
info[mapper[:mimetype]] = get_field(format, :mimetype)
|
133
|
+
info[mapper[:category]] = get_field(format, :category)
|
134
|
+
end
|
135
|
+
info
|
136
|
+
end
|
137
|
+
|
138
|
+
private
|
139
|
+
|
140
|
+
def initialize
|
141
|
+
@implementation = eval(Libis::Format::Config[:format_library_implementation])
|
142
|
+
end
|
143
|
+
|
144
|
+
end
|
145
|
+
|
146
|
+
end
|
147
|
+
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -12,10 +12,13 @@ module Libis
|
|
12
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
13
13
|
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
14
14
|
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
15
|
-
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
16
15
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
16
|
+
autoload :PdfMetadata, 'libis/format/tool/pdf_metadata'
|
17
17
|
autoload :PdfOptimizer, 'libis/format/tool/pdf_optimizer'
|
18
|
+
autoload :PdfProtect, 'libis/format/tool/pdf_protect'
|
19
|
+
autoload :PdfSelect, 'libis/format/tool/pdf_select'
|
18
20
|
autoload :PdfSplit, 'libis/format/tool/pdf_split'
|
21
|
+
autoload :PdfWatermark, 'libis/format/tool/pdf_watermark'
|
19
22
|
autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
|
20
23
|
autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
|
21
24
|
|
@@ -37,14 +37,14 @@ module Libis
|
|
37
37
|
def runner(*args)
|
38
38
|
|
39
39
|
args.map do |file|
|
40
|
-
info = ::Libis::Format::
|
40
|
+
info = ::Libis::Format::Library.get_info_by(:extension, File.extname(file))
|
41
41
|
if info
|
42
42
|
{
|
43
43
|
filepath: file,
|
44
|
-
mimetype: (info[:
|
45
|
-
puid: (info[:
|
44
|
+
mimetype: (info[:mimetypes].first rescue nil),
|
45
|
+
puid: (info[:puids].first rescue nil),
|
46
46
|
matchtype: 'extension',
|
47
|
-
tool: :
|
47
|
+
tool: :format_library
|
48
48
|
}
|
49
49
|
end
|
50
50
|
end.cleanup
|
@@ -7,7 +7,7 @@ require 'libis/tools/logger'
|
|
7
7
|
require 'libis/tools/command'
|
8
8
|
|
9
9
|
require 'libis/format/config'
|
10
|
-
require 'libis/format/
|
10
|
+
require 'libis/format/library'
|
11
11
|
|
12
12
|
module Libis
|
13
13
|
module Format
|
@@ -99,7 +99,7 @@ module Libis
|
|
99
99
|
end
|
100
100
|
|
101
101
|
# Normalize the mimetype
|
102
|
-
Libis::Format::
|
102
|
+
Libis::Format::Library.normalize(result)
|
103
103
|
|
104
104
|
# Default score is 5
|
105
105
|
result[:score] = 5
|
@@ -117,14 +117,14 @@ module Libis
|
|
117
117
|
# Signature match increases score with 2
|
118
118
|
when 'signature'
|
119
119
|
result[:score] += 2
|
120
|
-
# typeinfo = ::Libis::Format::
|
120
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
121
121
|
# ext = File.extname(result[:filename])
|
122
122
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
123
123
|
|
124
124
|
# Container match increases score with 4
|
125
125
|
when 'container'
|
126
126
|
result[:score] += 4
|
127
|
-
# typeinfo = ::Libis::Format::
|
127
|
+
# typeinfo = ::Libis::Format::Library.get_info_by(:puid, result[:puid])
|
128
128
|
# ext = File.extname(result[:filename])
|
129
129
|
# result[:score] += 1 if typeinfo and typeinfo[:EXTENSIONS].include?(ext)
|
130
130
|
|
@@ -152,11 +152,11 @@ module Libis
|
|
152
152
|
end
|
153
153
|
|
154
154
|
def get_mimetype(puid)
|
155
|
-
::Libis::Format::
|
155
|
+
::Libis::Format::Library.get_field_by(:puid, puid, :mimetype) rescue nil
|
156
156
|
end
|
157
157
|
|
158
158
|
def get_puid(mimetype)
|
159
|
-
::Libis::Format::
|
159
|
+
::Libis::Format::Library.get_field_by(:mimetype, mimetype, :puid) rescue nil
|
160
160
|
end
|
161
161
|
|
162
162
|
attr_accessor :bad_mimetypes, :bad_puids
|
@@ -27,9 +27,9 @@ module Libis
|
|
27
27
|
timeout = Libis::Format::Config[:timeouts][:pdf_merge]
|
28
28
|
result = Libis::Tools::Command.run(
|
29
29
|
Libis::Format::Config[:java_cmd],
|
30
|
-
'-
|
31
|
-
'
|
32
|
-
'
|
30
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
31
|
+
'merge',
|
32
|
+
'-o', target,
|
33
33
|
*options,
|
34
34
|
*source,
|
35
35
|
timeout: timeout,
|
@@ -10,7 +10,7 @@ module Libis
|
|
10
10
|
module Format
|
11
11
|
module Tool
|
12
12
|
|
13
|
-
class
|
13
|
+
class PdfMetadata
|
14
14
|
include ::Libis::Tools::Logger
|
15
15
|
|
16
16
|
def self.run(source, target, options = [])
|
@@ -26,10 +26,10 @@ module Libis
|
|
26
26
|
timeout = Libis::Format::Config[:timeouts][:pdf_copy]
|
27
27
|
result = Libis::Tools::Command.run(
|
28
28
|
Libis::Format::Config[:java_cmd],
|
29
|
-
'-
|
30
|
-
'
|
31
|
-
'
|
32
|
-
'
|
29
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
30
|
+
'metadata',
|
31
|
+
'-i', source,
|
32
|
+
'-o', target,
|
33
33
|
*options,
|
34
34
|
timeout: timeout,
|
35
35
|
kill_after: timeout * 2
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfProtect
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_copy]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
30
|
+
'protect',
|
31
|
+
'-i', source,
|
32
|
+
'-o', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfSelect
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_select]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
30
|
+
'select',
|
31
|
+
'-i', source,
|
32
|
+
'-o', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
@@ -26,10 +26,10 @@ module Libis
|
|
26
26
|
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
27
27
|
result = Libis::Tools::Command.run(
|
28
28
|
Libis::Format::Config[:java_cmd],
|
29
|
-
'-
|
30
|
-
'
|
31
|
-
'
|
32
|
-
'
|
29
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
30
|
+
'split',
|
31
|
+
'-i', source,
|
32
|
+
'-o', target,
|
33
33
|
*options,
|
34
34
|
timeout: timeout,
|
35
35
|
kill_after: timeout * 2
|
@@ -0,0 +1,47 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
module Tool
|
12
|
+
|
13
|
+
class PdfWatermark
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
|
16
|
+
def self.run(source, target, wm_type, options = [])
|
17
|
+
self.new.run source, target, wm_type, options
|
18
|
+
end
|
19
|
+
|
20
|
+
def run(source, target, wm_type, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_watermark]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-jar', Libis::Format::Config[:pdf_tool],
|
30
|
+
'watermark', wm_type,
|
31
|
+
'-i', source,
|
32
|
+
'-o', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
42
|
+
end
|
43
|
+
end
|
44
|
+
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|