libis-format 1.2.9 → 1.3.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/bin/{pdf_copy → pdf_tool} +3 -2
- data/data/types.yml +1 -1
- data/lib/libis/format/command_line.rb +2 -3
- data/lib/libis/format/config.rb +17 -20
- data/lib/libis/format/converter/base.rb +9 -16
- data/lib/libis/format/converter/chain.rb +36 -28
- data/lib/libis/format/converter/email_converter.rb +5 -8
- data/lib/libis/format/converter/fop_pdf_converter.rb +4 -6
- data/lib/libis/format/converter/image_converter.rb +51 -58
- data/lib/libis/format/converter/jp2_converter.rb +33 -35
- data/lib/libis/format/converter/office_converter.rb +19 -23
- data/lib/libis/format/converter/pdf_converter.rb +133 -52
- data/lib/libis/format/converter/repository.rb +7 -13
- data/lib/libis/format/converter/spreadsheet_converter.rb +7 -11
- data/lib/libis/format/converter/video_converter.rb +41 -55
- data/lib/libis/format/converter/xslt_converter.rb +14 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +41 -43
- data/lib/libis/format/tool/droid.rb +29 -30
- data/lib/libis/format/tool/ff_mpeg.rb +11 -13
- data/lib/libis/format/tool/fido.rb +1 -1
- data/lib/libis/format/tool/pdf_optimizer.rb +21 -27
- data/lib/libis/format/tool/pdf_to_pdfa.rb +2 -6
- data/lib/libis/format/tool/pdf_tool.rb +52 -0
- data/lib/libis/format/tool/pdfa_validator.rb +2 -3
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +23 -20
- data/lib/libis/format/tool.rb +2 -2
- data/lib/libis/format/type_database.rb +51 -28
- data/lib/libis/format/type_database_impl.rb +57 -24
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -2
- data/lib/libis-format.rb +2 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- data/tools/pdfbox/{preflight-app-2.0.13.jar → preflight-app-3.0.3.jar} +0 -0
- metadata +8 -10
- data/lib/libis/format/tool/pdf_copy.rb +0 -57
- data/lib/libis/format/tool/pdf_merge.rb +0 -58
- data/lib/libis/format/tool/pdf_split.rb +0 -56
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68f64b7c012a051d6c25eb3cb02fb74d662c0f1da348d4b3b5b4af5b7004d543
|
4
|
+
data.tar.gz: 62708947cef69d89540b2dd73b85a546eb012fae7e6565e329e989ab0573949b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed3dc449c70eb71f081a6c55f06d8a2b484afb0094a6988aad0ca018ae36e07beb1d9fae3a8fb0a3a026e90c13386f83686b7c66be34b8a028d588ef9412bd99
|
7
|
+
data.tar.gz: 44bc49f3315e99974e5c7f992e1f496ec3e59dbd15839f93a0e2f8b20fc73dcbfba0f77e178514f0e78f0f561961f55f31895e843ba217e70e2a3cc79b47cdfb
|
data/Gemfile
CHANGED
data/bin/{pdf_copy → pdf_tool}
RENAMED
@@ -5,9 +5,10 @@ require 'libis-tools'
|
|
5
5
|
|
6
6
|
::Libis::Tools::Config.logger.level = :WARN
|
7
7
|
|
8
|
+
command = ARGV.shift
|
9
|
+
command = command.split('_')
|
8
10
|
source = ARGV.shift
|
9
11
|
target = ARGV.shift
|
10
12
|
options = ARGV
|
11
13
|
|
12
|
-
::Libis::Format::Tool::
|
13
|
-
|
14
|
+
::Libis::Format::Tool::PdfTool.run command, source, target, *options
|
data/data/types.yml
CHANGED
@@ -377,7 +377,7 @@ PRESENTATION:
|
|
377
377
|
|
378
378
|
OO_IMPRESS:
|
379
379
|
NAME: OpenDocument Presentation (ODP)
|
380
|
-
|
380
|
+
PUID: fmt/138 fmt/292 fmt/293
|
381
381
|
MIME: application/vnd.oasis.opendocument.presentation application/vnd.oasis.opendocument.presentation-template
|
382
382
|
EXTENSIONS: odp otp
|
383
383
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'thor'
|
2
4
|
require 'tty-prompt'
|
3
5
|
require 'tty-config'
|
@@ -7,9 +9,7 @@ require 'libis/format/cli/format'
|
|
7
9
|
|
8
10
|
module Libis
|
9
11
|
module Format
|
10
|
-
|
11
12
|
class CommandLine < Thor
|
12
|
-
|
13
13
|
def self.exit_on_failure?
|
14
14
|
true
|
15
15
|
end
|
@@ -20,6 +20,5 @@ module Libis
|
|
20
20
|
desc 'format', 'perform format identification on a given file or directory'
|
21
21
|
subcommand 'format', Cli::Format
|
22
22
|
end
|
23
|
-
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/config.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'os'
|
3
4
|
require 'libis-tools'
|
4
5
|
|
5
6
|
module Libis
|
6
7
|
module Format
|
7
|
-
|
8
8
|
Config = ::Libis::Tools::Config
|
9
9
|
|
10
10
|
Config[:converter_chain_max_level] = 8
|
@@ -15,33 +15,30 @@ module Libis
|
|
15
15
|
Config[:ghostscript_cmd] = 'gs'
|
16
16
|
Config[:droid_cmd] = '/opt/droid/droid.sh'
|
17
17
|
Config[:droid_temp_path] = '/tmp'
|
18
|
-
Config[:fido_cmd] = '
|
18
|
+
Config[:fido_cmd] = 'fido'
|
19
19
|
Config[:fop_jar] = File.join(Libis::Format::TOOL_DIR, 'fop', 'build', 'fop.jar')
|
20
20
|
Config[:ffmpeg_cmd] = 'ffmpeg'
|
21
|
-
Config[:fido_formats] = [
|
21
|
+
Config[:fido_formats] = [File.join(Libis::Format::DATA_DIR, 'lias_formats.xml')]
|
22
22
|
Config[:pdf_tool] = File.join(Libis::Format::TOOL_DIR, 'PdfTool.jar')
|
23
|
-
Config[:preflight_jar] = File.join(Libis::Format::TOOL_DIR, 'pdfbox', 'preflight-app-
|
23
|
+
Config[:preflight_jar] = File.join(Libis::Format::TOOL_DIR, 'pdfbox', 'preflight-app-3.0.3.jar')
|
24
24
|
Config[:wkhtmltopdf] = 'wkhtmltopdf'
|
25
25
|
Config[:xml_validations] = [['archive/ead', File.join(Libis::Format::DATA_DIR, 'ead.xsd')]]
|
26
26
|
Config[:type_database] = File.join(Libis::Format::DATA_DIR, 'types.yml')
|
27
27
|
Config[:raw_audio_convert_cmd] = 'sox -V1 %s -e signed -b 16 -t wav %s rate %d channels %d'
|
28
28
|
Config[:watermark_font] = '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf'
|
29
29
|
Config[:timeouts] = {
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
pdf_to_pdfa: 5 * 60,
|
43
|
-
pdfa_validator: 5 * 60,
|
30
|
+
droid: 20 * 60,
|
31
|
+
ffmpeg: 5 * 60,
|
32
|
+
fido: 20 * 60,
|
33
|
+
file_tool: 5 * 60,
|
34
|
+
fop: 5 * 60,
|
35
|
+
identification_tool: 5 * 60,
|
36
|
+
office_to_pdf: 5 * 60,
|
37
|
+
email2pdf: 5 * 60,
|
38
|
+
pdf_tool: 5 * 60,
|
39
|
+
pdf_optimizer: 5 * 60,
|
40
|
+
pdf_to_pdfa: 5 * 60,
|
41
|
+
pdfa_validator: 5 * 60
|
44
42
|
}
|
45
|
-
|
46
43
|
end
|
47
44
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
### require 'tools/string'
|
4
4
|
require 'tmpdir'
|
@@ -11,7 +11,6 @@ require_relative 'repository'
|
|
11
11
|
module Libis
|
12
12
|
module Format
|
13
13
|
module Converter
|
14
|
-
|
15
14
|
class Base
|
16
15
|
include Libis::Tools::Logger
|
17
16
|
|
@@ -22,7 +21,7 @@ module Libis
|
|
22
21
|
@flags = {}
|
23
22
|
end
|
24
23
|
|
25
|
-
def convert(source,
|
24
|
+
def convert(source, _target, _format, opts = {})
|
26
25
|
unless File.exist? source
|
27
26
|
error "Cannot find file '#{source}'."
|
28
27
|
return nil
|
@@ -32,35 +31,33 @@ module Libis
|
|
32
31
|
end
|
33
32
|
|
34
33
|
def self.input_types
|
35
|
-
raise
|
34
|
+
raise 'Method #input_types needs to be overridden in converter'
|
36
35
|
end
|
37
36
|
|
38
37
|
def self.output_types(_format = nil)
|
39
|
-
raise
|
38
|
+
raise 'Method #output_types needs to be overridden in converter'
|
40
39
|
end
|
41
40
|
|
42
41
|
def using_temp(target, &block)
|
43
42
|
self.class.using_temp(target, &block)
|
44
43
|
end
|
45
44
|
|
46
|
-
def
|
45
|
+
def self.using_temp(target)
|
47
46
|
tempfile = Tools::TempFile.name("convert-#{File.basename(target, '.*').gsub(/\s/, '_')}", File.extname(target))
|
48
47
|
result = yield tempfile
|
49
48
|
return nil unless result
|
49
|
+
|
50
50
|
FileUtils.move result, target
|
51
51
|
target
|
52
52
|
end
|
53
53
|
|
54
|
-
def
|
55
|
-
|
54
|
+
def self.inherited(klass) # rubocop:disable Lint/MissingSuper
|
56
55
|
Repository.register klass
|
57
56
|
|
58
57
|
class << self
|
59
|
-
|
60
58
|
def conversions
|
61
|
-
input_types.
|
59
|
+
input_types.each_with_object({}) do |input_type, hash|
|
62
60
|
hash[input_type] = output_types
|
63
|
-
hash
|
64
61
|
end
|
65
62
|
end
|
66
63
|
|
@@ -83,7 +80,7 @@ module Libis
|
|
83
80
|
end
|
84
81
|
|
85
82
|
def conversion?(input_type, output_type)
|
86
|
-
conversions[input_type]
|
83
|
+
conversions[input_type]&.any? { |t| t == output_type }
|
87
84
|
end
|
88
85
|
|
89
86
|
def output_for(input_type)
|
@@ -93,13 +90,9 @@ module Libis
|
|
93
90
|
def extension?(extension)
|
94
91
|
!TypeDatabase.ext_types(extension).first.nil?
|
95
92
|
end
|
96
|
-
|
97
93
|
end
|
98
|
-
|
99
94
|
end
|
100
|
-
|
101
95
|
end
|
102
|
-
|
103
96
|
end
|
104
97
|
end
|
105
98
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'fileutils'
|
4
4
|
require 'deep_dive'
|
@@ -10,7 +10,6 @@ require 'libis/format/type_database'
|
|
10
10
|
module Libis
|
11
11
|
module Format
|
12
12
|
module Converter
|
13
|
-
|
14
13
|
class Chain
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
include DeepDive
|
@@ -26,15 +25,16 @@ module Libis
|
|
26
25
|
# @return [Array[Hash]]
|
27
26
|
def append(converter)
|
28
27
|
return [] unless converter
|
28
|
+
|
29
29
|
valid_chain_nodes(converter).map do |node|
|
30
|
-
|
30
|
+
ddup.add_chain_node(node)
|
31
31
|
end.compact
|
32
32
|
end
|
33
33
|
|
34
34
|
def closed?
|
35
35
|
!@converter_chain.empty? &&
|
36
|
-
|
37
|
-
|
36
|
+
@converter_chain.first[:input].to_sym == @source_format &&
|
37
|
+
@converter_chain.last[:output].to_sym == @target_format
|
38
38
|
end
|
39
39
|
|
40
40
|
def valid?
|
@@ -49,19 +49,14 @@ module Libis
|
|
49
49
|
@converter_chain.size
|
50
50
|
end
|
51
51
|
|
52
|
-
|
52
|
+
alias length size
|
53
53
|
|
54
54
|
def to_s
|
55
|
-
|
56
|
-
|
57
|
-
"(#{node[:operations].each do |operation|
|
58
|
-
"#{operation[:method]}:#{operation[:argument]}"
|
59
|
-
end.join(',')})"}->-#{node[:output]}"
|
60
|
-
end.join('->-')}"
|
55
|
+
result = @source_format.to_s
|
56
|
+
result << @converter_chain.map { |node| node_to_s(node) }.join('->-')
|
61
57
|
end
|
62
58
|
|
63
59
|
def convert(src_file, target_file)
|
64
|
-
|
65
60
|
unless valid?
|
66
61
|
error 'Converter chain is not valid'
|
67
62
|
return nil
|
@@ -74,15 +69,13 @@ module Libis
|
|
74
69
|
|
75
70
|
# noinspection RubyParenthesesAroundConditionInspection
|
76
71
|
conversion_success = @converter_chain.each_with_index do |node, i|
|
77
|
-
|
78
72
|
target_type = node[:output]
|
79
73
|
converter_class = node[:converter]
|
80
74
|
converter = converter_class.new
|
81
75
|
|
82
|
-
|
83
|
-
node[:operations].each do |operation|
|
76
|
+
node[:operations]&.each do |operation|
|
84
77
|
converter.send operation[:method], operation[:argument]
|
85
|
-
end
|
78
|
+
end
|
86
79
|
|
87
80
|
target = target_file
|
88
81
|
|
@@ -103,7 +96,6 @@ module Libis
|
|
103
96
|
result[:commands] << r.merge(converter: converter_class.name)
|
104
97
|
|
105
98
|
:success
|
106
|
-
|
107
99
|
end
|
108
100
|
|
109
101
|
result[:files] = [src_file] + xtra_files
|
@@ -113,16 +105,20 @@ module Libis
|
|
113
105
|
end
|
114
106
|
|
115
107
|
conversion_success == :failed ? nil : result
|
116
|
-
|
117
108
|
end
|
118
109
|
|
119
110
|
def valid_chain_nodes(converter)
|
120
|
-
source_format =
|
111
|
+
source_format = begin
|
112
|
+
@converter_chain.last[:output]
|
113
|
+
rescue StandardError
|
114
|
+
@source_format
|
115
|
+
end
|
121
116
|
nodes = []
|
122
117
|
if converter.input_types.include? source_format
|
123
118
|
converter.output_types(source_format).each do |format|
|
124
|
-
node = {
|
119
|
+
node = { converter:, input: source_format, output: format }
|
125
120
|
next if node_exists?(node)
|
121
|
+
|
126
122
|
nodes << node
|
127
123
|
end
|
128
124
|
end
|
@@ -137,6 +133,7 @@ module Libis
|
|
137
133
|
return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
|
138
134
|
return nil unless node[:converter].input_types.include? source_format
|
139
135
|
return nil if node_exists?(node)
|
136
|
+
|
140
137
|
@converter_chain << node
|
141
138
|
# debug "Chain: #{self}"
|
142
139
|
self
|
@@ -145,15 +142,16 @@ module Libis
|
|
145
142
|
def apply_operations
|
146
143
|
temp_chain = @converter_chain.reverse.ddup
|
147
144
|
applied = true
|
148
|
-
operations = @operations
|
145
|
+
operations = @operations&.ddup || {}
|
149
146
|
while (operation = operations.shift)
|
150
147
|
method = operation.first.to_s.to_sym
|
151
|
-
applied &&=
|
148
|
+
applied &&= temp_chain.each do |node|
|
152
149
|
next unless node[:converter].instance_methods.include?(method)
|
150
|
+
|
153
151
|
node[:operations] ||= []
|
154
|
-
node[:operations] << {
|
152
|
+
node[:operations] << { method:, argument: operation.last }
|
155
153
|
break :found
|
156
|
-
end
|
154
|
+
end == :found
|
157
155
|
end
|
158
156
|
if applied && operations.empty?
|
159
157
|
@converter_chain = temp_chain.reverse
|
@@ -163,17 +161,27 @@ module Libis
|
|
163
161
|
false
|
164
162
|
end
|
165
163
|
|
166
|
-
|
167
164
|
private
|
168
165
|
|
166
|
+
def node_to_s(node)
|
167
|
+
result = node[:converter].name.gsub(/^.*::/, '').to_s
|
168
|
+
unless node[:operations].empty?
|
169
|
+
result << '('
|
170
|
+
result << node[:operations].map do |operation|
|
171
|
+
"#{operation[:method]}:#{operation[:argument]}"
|
172
|
+
end.join(',')
|
173
|
+
result << ')'
|
174
|
+
end
|
175
|
+
result << '->-'
|
176
|
+
result << node[:output].to_s
|
177
|
+
end
|
178
|
+
|
169
179
|
def node_exists?(node)
|
170
180
|
@converter_chain.detect do |n|
|
171
181
|
n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
|
172
182
|
end
|
173
183
|
end
|
174
|
-
|
175
184
|
end
|
176
|
-
|
177
185
|
end
|
178
186
|
end
|
179
187
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'base'
|
4
4
|
|
@@ -9,20 +9,19 @@ require 'rexml/document'
|
|
9
9
|
module Libis
|
10
10
|
module Format
|
11
11
|
module Converter
|
12
|
-
|
13
12
|
class EmailConverter < Libis::Format::Converter::Base
|
14
|
-
|
15
13
|
def self.input_types
|
16
|
-
[
|
14
|
+
[:MSG]
|
17
15
|
end
|
18
16
|
|
19
17
|
def self.output_types(format = nil)
|
20
18
|
return [] unless input_types.include?(format)
|
21
|
-
|
19
|
+
|
20
|
+
[:PDF]
|
22
21
|
end
|
23
22
|
|
24
23
|
def email_convert(_)
|
25
|
-
#force usage of this converter
|
24
|
+
# force usage of this converter
|
26
25
|
end
|
27
26
|
|
28
27
|
def convert(source, target, format, opts = {})
|
@@ -30,9 +29,7 @@ module Libis
|
|
30
29
|
|
31
30
|
Format::Tool::MsgToPdf.run(source, target)
|
32
31
|
end
|
33
|
-
|
34
32
|
end
|
35
|
-
|
36
33
|
end
|
37
34
|
end
|
38
35
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
|
3
5
|
require_relative 'base'
|
@@ -7,15 +9,14 @@ require 'libis/format/tool/fop_pdf'
|
|
7
9
|
module Libis
|
8
10
|
module Format
|
9
11
|
module Converter
|
10
|
-
|
11
12
|
class FopPdfConverter < Libis::Format::Converter::Base
|
12
|
-
|
13
13
|
def self.input_types
|
14
14
|
[:XML]
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.output_types(format = nil)
|
18
|
-
return []
|
18
|
+
return [] if format && !input_types.include?(format)
|
19
|
+
|
19
20
|
[:PDF]
|
20
21
|
end
|
21
22
|
|
@@ -30,11 +31,8 @@ module Libis
|
|
30
31
|
FileUtils.mkpath(File.dirname(target))
|
31
32
|
|
32
33
|
Libis::Format::Tool::FopPdf.run(source, target)
|
33
|
-
|
34
34
|
end
|
35
|
-
|
36
35
|
end
|
37
|
-
|
38
36
|
end
|
39
37
|
end
|
40
38
|
end
|