libis-format 1.2.9 → 1.3.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile +4 -0
- data/bin/{pdf_copy → pdf_tool} +3 -2
- data/data/types.yml +1 -1
- data/lib/libis/format/command_line.rb +2 -3
- data/lib/libis/format/config.rb +17 -20
- data/lib/libis/format/converter/base.rb +9 -16
- data/lib/libis/format/converter/chain.rb +36 -28
- data/lib/libis/format/converter/email_converter.rb +5 -8
- data/lib/libis/format/converter/fop_pdf_converter.rb +4 -6
- data/lib/libis/format/converter/image_converter.rb +51 -58
- data/lib/libis/format/converter/jp2_converter.rb +33 -35
- data/lib/libis/format/converter/office_converter.rb +19 -23
- data/lib/libis/format/converter/pdf_converter.rb +133 -52
- data/lib/libis/format/converter/repository.rb +7 -13
- data/lib/libis/format/converter/spreadsheet_converter.rb +7 -11
- data/lib/libis/format/converter/video_converter.rb +41 -55
- data/lib/libis/format/converter/xslt_converter.rb +14 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +41 -43
- data/lib/libis/format/tool/droid.rb +29 -30
- data/lib/libis/format/tool/ff_mpeg.rb +11 -13
- data/lib/libis/format/tool/fido.rb +1 -1
- data/lib/libis/format/tool/pdf_optimizer.rb +21 -27
- data/lib/libis/format/tool/pdf_to_pdfa.rb +2 -6
- data/lib/libis/format/tool/pdf_tool.rb +52 -0
- data/lib/libis/format/tool/pdfa_validator.rb +2 -3
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +23 -20
- data/lib/libis/format/tool.rb +2 -2
- data/lib/libis/format/type_database.rb +51 -28
- data/lib/libis/format/type_database_impl.rb +57 -24
- data/lib/libis/format/version.rb +1 -1
- data/lib/libis/format.rb +3 -2
- data/lib/libis-format.rb +2 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- data/tools/pdfbox/{preflight-app-2.0.13.jar → preflight-app-3.0.3.jar} +0 -0
- metadata +8 -10
- data/lib/libis/format/tool/pdf_copy.rb +0 -57
- data/lib/libis/format/tool/pdf_merge.rb +0 -58
- data/lib/libis/format/tool/pdf_split.rb +0 -56
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 68f64b7c012a051d6c25eb3cb02fb74d662c0f1da348d4b3b5b4af5b7004d543
|
4
|
+
data.tar.gz: 62708947cef69d89540b2dd73b85a546eb012fae7e6565e329e989ab0573949b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: ed3dc449c70eb71f081a6c55f06d8a2b484afb0094a6988aad0ca018ae36e07beb1d9fae3a8fb0a3a026e90c13386f83686b7c66be34b8a028d588ef9412bd99
|
7
|
+
data.tar.gz: 44bc49f3315e99974e5c7f992e1f496ec3e59dbd15839f93a0e2f8b20fc73dcbfba0f77e178514f0e78f0f561961f55f31895e843ba217e70e2a3cc79b47cdfb
|
data/Gemfile
CHANGED
data/bin/{pdf_copy → pdf_tool}
RENAMED
@@ -5,9 +5,10 @@ require 'libis-tools'
|
|
5
5
|
|
6
6
|
::Libis::Tools::Config.logger.level = :WARN
|
7
7
|
|
8
|
+
command = ARGV.shift
|
9
|
+
command = command.split('_')
|
8
10
|
source = ARGV.shift
|
9
11
|
target = ARGV.shift
|
10
12
|
options = ARGV
|
11
13
|
|
12
|
-
::Libis::Format::Tool::
|
13
|
-
|
14
|
+
::Libis::Format::Tool::PdfTool.run command, source, target, *options
|
data/data/types.yml
CHANGED
@@ -377,7 +377,7 @@ PRESENTATION:
|
|
377
377
|
|
378
378
|
OO_IMPRESS:
|
379
379
|
NAME: OpenDocument Presentation (ODP)
|
380
|
-
|
380
|
+
PUID: fmt/138 fmt/292 fmt/293
|
381
381
|
MIME: application/vnd.oasis.opendocument.presentation application/vnd.oasis.opendocument.presentation-template
|
382
382
|
EXTENSIONS: odp otp
|
383
383
|
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'thor'
|
2
4
|
require 'tty-prompt'
|
3
5
|
require 'tty-config'
|
@@ -7,9 +9,7 @@ require 'libis/format/cli/format'
|
|
7
9
|
|
8
10
|
module Libis
|
9
11
|
module Format
|
10
|
-
|
11
12
|
class CommandLine < Thor
|
12
|
-
|
13
13
|
def self.exit_on_failure?
|
14
14
|
true
|
15
15
|
end
|
@@ -20,6 +20,5 @@ module Libis
|
|
20
20
|
desc 'format', 'perform format identification on a given file or directory'
|
21
21
|
subcommand 'format', Cli::Format
|
22
22
|
end
|
23
|
-
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/config.rb
CHANGED
@@ -1,10 +1,10 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
2
3
|
require 'os'
|
3
4
|
require 'libis-tools'
|
4
5
|
|
5
6
|
module Libis
|
6
7
|
module Format
|
7
|
-
|
8
8
|
Config = ::Libis::Tools::Config
|
9
9
|
|
10
10
|
Config[:converter_chain_max_level] = 8
|
@@ -15,33 +15,30 @@ module Libis
|
|
15
15
|
Config[:ghostscript_cmd] = 'gs'
|
16
16
|
Config[:droid_cmd] = '/opt/droid/droid.sh'
|
17
17
|
Config[:droid_temp_path] = '/tmp'
|
18
|
-
Config[:fido_cmd] = '
|
18
|
+
Config[:fido_cmd] = 'fido'
|
19
19
|
Config[:fop_jar] = File.join(Libis::Format::TOOL_DIR, 'fop', 'build', 'fop.jar')
|
20
20
|
Config[:ffmpeg_cmd] = 'ffmpeg'
|
21
|
-
Config[:fido_formats] = [
|
21
|
+
Config[:fido_formats] = [File.join(Libis::Format::DATA_DIR, 'lias_formats.xml')]
|
22
22
|
Config[:pdf_tool] = File.join(Libis::Format::TOOL_DIR, 'PdfTool.jar')
|
23
|
-
Config[:preflight_jar] = File.join(Libis::Format::TOOL_DIR, 'pdfbox', 'preflight-app-
|
23
|
+
Config[:preflight_jar] = File.join(Libis::Format::TOOL_DIR, 'pdfbox', 'preflight-app-3.0.3.jar')
|
24
24
|
Config[:wkhtmltopdf] = 'wkhtmltopdf'
|
25
25
|
Config[:xml_validations] = [['archive/ead', File.join(Libis::Format::DATA_DIR, 'ead.xsd')]]
|
26
26
|
Config[:type_database] = File.join(Libis::Format::DATA_DIR, 'types.yml')
|
27
27
|
Config[:raw_audio_convert_cmd] = 'sox -V1 %s -e signed -b 16 -t wav %s rate %d channels %d'
|
28
28
|
Config[:watermark_font] = '/usr/share/fonts/truetype/liberation/LiberationSans-Regular.ttf'
|
29
29
|
Config[:timeouts] = {
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
pdf_to_pdfa: 5 * 60,
|
43
|
-
pdfa_validator: 5 * 60,
|
30
|
+
droid: 20 * 60,
|
31
|
+
ffmpeg: 5 * 60,
|
32
|
+
fido: 20 * 60,
|
33
|
+
file_tool: 5 * 60,
|
34
|
+
fop: 5 * 60,
|
35
|
+
identification_tool: 5 * 60,
|
36
|
+
office_to_pdf: 5 * 60,
|
37
|
+
email2pdf: 5 * 60,
|
38
|
+
pdf_tool: 5 * 60,
|
39
|
+
pdf_optimizer: 5 * 60,
|
40
|
+
pdf_to_pdfa: 5 * 60,
|
41
|
+
pdfa_validator: 5 * 60
|
44
42
|
}
|
45
|
-
|
46
43
|
end
|
47
44
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
### require 'tools/string'
|
4
4
|
require 'tmpdir'
|
@@ -11,7 +11,6 @@ require_relative 'repository'
|
|
11
11
|
module Libis
|
12
12
|
module Format
|
13
13
|
module Converter
|
14
|
-
|
15
14
|
class Base
|
16
15
|
include Libis::Tools::Logger
|
17
16
|
|
@@ -22,7 +21,7 @@ module Libis
|
|
22
21
|
@flags = {}
|
23
22
|
end
|
24
23
|
|
25
|
-
def convert(source,
|
24
|
+
def convert(source, _target, _format, opts = {})
|
26
25
|
unless File.exist? source
|
27
26
|
error "Cannot find file '#{source}'."
|
28
27
|
return nil
|
@@ -32,35 +31,33 @@ module Libis
|
|
32
31
|
end
|
33
32
|
|
34
33
|
def self.input_types
|
35
|
-
raise
|
34
|
+
raise 'Method #input_types needs to be overridden in converter'
|
36
35
|
end
|
37
36
|
|
38
37
|
def self.output_types(_format = nil)
|
39
|
-
raise
|
38
|
+
raise 'Method #output_types needs to be overridden in converter'
|
40
39
|
end
|
41
40
|
|
42
41
|
def using_temp(target, &block)
|
43
42
|
self.class.using_temp(target, &block)
|
44
43
|
end
|
45
44
|
|
46
|
-
def
|
45
|
+
def self.using_temp(target)
|
47
46
|
tempfile = Tools::TempFile.name("convert-#{File.basename(target, '.*').gsub(/\s/, '_')}", File.extname(target))
|
48
47
|
result = yield tempfile
|
49
48
|
return nil unless result
|
49
|
+
|
50
50
|
FileUtils.move result, target
|
51
51
|
target
|
52
52
|
end
|
53
53
|
|
54
|
-
def
|
55
|
-
|
54
|
+
def self.inherited(klass) # rubocop:disable Lint/MissingSuper
|
56
55
|
Repository.register klass
|
57
56
|
|
58
57
|
class << self
|
59
|
-
|
60
58
|
def conversions
|
61
|
-
input_types.
|
59
|
+
input_types.each_with_object({}) do |input_type, hash|
|
62
60
|
hash[input_type] = output_types
|
63
|
-
hash
|
64
61
|
end
|
65
62
|
end
|
66
63
|
|
@@ -83,7 +80,7 @@ module Libis
|
|
83
80
|
end
|
84
81
|
|
85
82
|
def conversion?(input_type, output_type)
|
86
|
-
conversions[input_type]
|
83
|
+
conversions[input_type]&.any? { |t| t == output_type }
|
87
84
|
end
|
88
85
|
|
89
86
|
def output_for(input_type)
|
@@ -93,13 +90,9 @@ module Libis
|
|
93
90
|
def extension?(extension)
|
94
91
|
!TypeDatabase.ext_types(extension).first.nil?
|
95
92
|
end
|
96
|
-
|
97
93
|
end
|
98
|
-
|
99
94
|
end
|
100
|
-
|
101
95
|
end
|
102
|
-
|
103
96
|
end
|
104
97
|
end
|
105
98
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require 'fileutils'
|
4
4
|
require 'deep_dive'
|
@@ -10,7 +10,6 @@ require 'libis/format/type_database'
|
|
10
10
|
module Libis
|
11
11
|
module Format
|
12
12
|
module Converter
|
13
|
-
|
14
13
|
class Chain
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
include DeepDive
|
@@ -26,15 +25,16 @@ module Libis
|
|
26
25
|
# @return [Array[Hash]]
|
27
26
|
def append(converter)
|
28
27
|
return [] unless converter
|
28
|
+
|
29
29
|
valid_chain_nodes(converter).map do |node|
|
30
|
-
|
30
|
+
ddup.add_chain_node(node)
|
31
31
|
end.compact
|
32
32
|
end
|
33
33
|
|
34
34
|
def closed?
|
35
35
|
!@converter_chain.empty? &&
|
36
|
-
|
37
|
-
|
36
|
+
@converter_chain.first[:input].to_sym == @source_format &&
|
37
|
+
@converter_chain.last[:output].to_sym == @target_format
|
38
38
|
end
|
39
39
|
|
40
40
|
def valid?
|
@@ -49,19 +49,14 @@ module Libis
|
|
49
49
|
@converter_chain.size
|
50
50
|
end
|
51
51
|
|
52
|
-
|
52
|
+
alias length size
|
53
53
|
|
54
54
|
def to_s
|
55
|
-
|
56
|
-
|
57
|
-
"(#{node[:operations].each do |operation|
|
58
|
-
"#{operation[:method]}:#{operation[:argument]}"
|
59
|
-
end.join(',')})"}->-#{node[:output]}"
|
60
|
-
end.join('->-')}"
|
55
|
+
result = @source_format.to_s
|
56
|
+
result << @converter_chain.map { |node| node_to_s(node) }.join('->-')
|
61
57
|
end
|
62
58
|
|
63
59
|
def convert(src_file, target_file)
|
64
|
-
|
65
60
|
unless valid?
|
66
61
|
error 'Converter chain is not valid'
|
67
62
|
return nil
|
@@ -74,15 +69,13 @@ module Libis
|
|
74
69
|
|
75
70
|
# noinspection RubyParenthesesAroundConditionInspection
|
76
71
|
conversion_success = @converter_chain.each_with_index do |node, i|
|
77
|
-
|
78
72
|
target_type = node[:output]
|
79
73
|
converter_class = node[:converter]
|
80
74
|
converter = converter_class.new
|
81
75
|
|
82
|
-
|
83
|
-
node[:operations].each do |operation|
|
76
|
+
node[:operations]&.each do |operation|
|
84
77
|
converter.send operation[:method], operation[:argument]
|
85
|
-
end
|
78
|
+
end
|
86
79
|
|
87
80
|
target = target_file
|
88
81
|
|
@@ -103,7 +96,6 @@ module Libis
|
|
103
96
|
result[:commands] << r.merge(converter: converter_class.name)
|
104
97
|
|
105
98
|
:success
|
106
|
-
|
107
99
|
end
|
108
100
|
|
109
101
|
result[:files] = [src_file] + xtra_files
|
@@ -113,16 +105,20 @@ module Libis
|
|
113
105
|
end
|
114
106
|
|
115
107
|
conversion_success == :failed ? nil : result
|
116
|
-
|
117
108
|
end
|
118
109
|
|
119
110
|
def valid_chain_nodes(converter)
|
120
|
-
source_format =
|
111
|
+
source_format = begin
|
112
|
+
@converter_chain.last[:output]
|
113
|
+
rescue StandardError
|
114
|
+
@source_format
|
115
|
+
end
|
121
116
|
nodes = []
|
122
117
|
if converter.input_types.include? source_format
|
123
118
|
converter.output_types(source_format).each do |format|
|
124
|
-
node = {
|
119
|
+
node = { converter:, input: source_format, output: format }
|
125
120
|
next if node_exists?(node)
|
121
|
+
|
126
122
|
nodes << node
|
127
123
|
end
|
128
124
|
end
|
@@ -137,6 +133,7 @@ module Libis
|
|
137
133
|
return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
|
138
134
|
return nil unless node[:converter].input_types.include? source_format
|
139
135
|
return nil if node_exists?(node)
|
136
|
+
|
140
137
|
@converter_chain << node
|
141
138
|
# debug "Chain: #{self}"
|
142
139
|
self
|
@@ -145,15 +142,16 @@ module Libis
|
|
145
142
|
def apply_operations
|
146
143
|
temp_chain = @converter_chain.reverse.ddup
|
147
144
|
applied = true
|
148
|
-
operations = @operations
|
145
|
+
operations = @operations&.ddup || {}
|
149
146
|
while (operation = operations.shift)
|
150
147
|
method = operation.first.to_s.to_sym
|
151
|
-
applied &&=
|
148
|
+
applied &&= temp_chain.each do |node|
|
152
149
|
next unless node[:converter].instance_methods.include?(method)
|
150
|
+
|
153
151
|
node[:operations] ||= []
|
154
|
-
node[:operations] << {
|
152
|
+
node[:operations] << { method:, argument: operation.last }
|
155
153
|
break :found
|
156
|
-
end
|
154
|
+
end == :found
|
157
155
|
end
|
158
156
|
if applied && operations.empty?
|
159
157
|
@converter_chain = temp_chain.reverse
|
@@ -163,17 +161,27 @@ module Libis
|
|
163
161
|
false
|
164
162
|
end
|
165
163
|
|
166
|
-
|
167
164
|
private
|
168
165
|
|
166
|
+
def node_to_s(node)
|
167
|
+
result = node[:converter].name.gsub(/^.*::/, '').to_s
|
168
|
+
unless node[:operations].empty?
|
169
|
+
result << '('
|
170
|
+
result << node[:operations].map do |operation|
|
171
|
+
"#{operation[:method]}:#{operation[:argument]}"
|
172
|
+
end.join(',')
|
173
|
+
result << ')'
|
174
|
+
end
|
175
|
+
result << '->-'
|
176
|
+
result << node[:output].to_s
|
177
|
+
end
|
178
|
+
|
169
179
|
def node_exists?(node)
|
170
180
|
@converter_chain.detect do |n|
|
171
181
|
n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
|
172
182
|
end
|
173
183
|
end
|
174
|
-
|
175
184
|
end
|
176
|
-
|
177
185
|
end
|
178
186
|
end
|
179
187
|
end
|
@@ -1,4 +1,4 @@
|
|
1
|
-
#
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
3
|
require_relative 'base'
|
4
4
|
|
@@ -9,20 +9,19 @@ require 'rexml/document'
|
|
9
9
|
module Libis
|
10
10
|
module Format
|
11
11
|
module Converter
|
12
|
-
|
13
12
|
class EmailConverter < Libis::Format::Converter::Base
|
14
|
-
|
15
13
|
def self.input_types
|
16
|
-
[
|
14
|
+
[:MSG]
|
17
15
|
end
|
18
16
|
|
19
17
|
def self.output_types(format = nil)
|
20
18
|
return [] unless input_types.include?(format)
|
21
|
-
|
19
|
+
|
20
|
+
[:PDF]
|
22
21
|
end
|
23
22
|
|
24
23
|
def email_convert(_)
|
25
|
-
#force usage of this converter
|
24
|
+
# force usage of this converter
|
26
25
|
end
|
27
26
|
|
28
27
|
def convert(source, target, format, opts = {})
|
@@ -30,9 +29,7 @@ module Libis
|
|
30
29
|
|
31
30
|
Format::Tool::MsgToPdf.run(source, target)
|
32
31
|
end
|
33
|
-
|
34
32
|
end
|
35
|
-
|
36
33
|
end
|
37
34
|
end
|
38
35
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
require 'nokogiri'
|
2
4
|
|
3
5
|
require_relative 'base'
|
@@ -7,15 +9,14 @@ require 'libis/format/tool/fop_pdf'
|
|
7
9
|
module Libis
|
8
10
|
module Format
|
9
11
|
module Converter
|
10
|
-
|
11
12
|
class FopPdfConverter < Libis::Format::Converter::Base
|
12
|
-
|
13
13
|
def self.input_types
|
14
14
|
[:XML]
|
15
15
|
end
|
16
16
|
|
17
17
|
def self.output_types(format = nil)
|
18
|
-
return []
|
18
|
+
return [] if format && !input_types.include?(format)
|
19
|
+
|
19
20
|
[:PDF]
|
20
21
|
end
|
21
22
|
|
@@ -30,11 +31,8 @@ module Libis
|
|
30
31
|
FileUtils.mkpath(File.dirname(target))
|
31
32
|
|
32
33
|
Libis::Format::Tool::FopPdf.run(source, target)
|
33
|
-
|
34
34
|
end
|
35
|
-
|
36
35
|
end
|
37
|
-
|
38
36
|
end
|
39
37
|
end
|
40
38
|
end
|