libis-format 1.3.4 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +45 -250
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +86 -128
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,57 +9,65 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfaValidator
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
16
|
def self.run(source)
|
18
|
-
new.run source
|
17
|
+
self.new.run source
|
19
18
|
end
|
20
19
|
|
21
20
|
def run(source)
|
21
|
+
|
22
22
|
src_file = File.absolute_path(source)
|
23
23
|
|
24
24
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
25
|
-
result = nil
|
26
25
|
if (pdfa = Libis::Format::Config[:pdfa_cmd])
|
27
26
|
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
28
27
|
previous_wd = Dir.getwd
|
29
28
|
Dir.chdir(Dir.tmpdir)
|
30
29
|
|
31
30
|
result = Libis::Tools::Command.run(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
pdfa,
|
32
|
+
'--noxml',
|
33
|
+
'--level', 'B',
|
34
|
+
'--verb', '0',
|
35
|
+
src_file,
|
36
|
+
timeout: timeout,
|
37
|
+
kill_after: timeout * 2
|
39
38
|
)
|
40
39
|
|
41
|
-
|
40
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
42
42
|
|
43
43
|
Dir.chdir(previous_wd)
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
|
46
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
47
|
+
result[:out].join("\n")
|
48
|
+
return false
|
49
|
+
end
|
49
50
|
else
|
50
51
|
jar = Libis::Format::Config[:preflight_jar]
|
51
52
|
result = Libis::Tools::Command.run(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
Libis::Format::Config[:java_cmd],
|
54
|
+
'-jar', jar,
|
55
|
+
src_file,
|
56
|
+
timeout: timeout,
|
57
|
+
kill_after: timeout * 2
|
57
58
|
)
|
59
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
58
60
|
|
59
|
-
result[:
|
60
|
-
|
61
|
+
unless result[:status] == 0
|
62
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
63
|
+
result[:out].join("\n")
|
64
|
+
return false
|
65
|
+
end
|
61
66
|
end
|
62
|
-
|
67
|
+
true
|
63
68
|
end
|
64
69
|
end
|
70
|
+
|
65
71
|
end
|
66
72
|
end
|
67
73
|
end
|
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# noinspection RubyResolve
|
3
2
|
require 'fileutils'
|
4
3
|
|
5
4
|
require 'libis/tools/extend/string'
|
@@ -11,62 +10,52 @@ require 'libis/format/config'
|
|
11
10
|
module Libis
|
12
11
|
module Format
|
13
12
|
module Tool
|
13
|
+
|
14
14
|
class SpreadsheetToOds
|
15
15
|
include ::Libis::Tools::Logger
|
16
16
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
-
result.zero?
|
20
|
-
end
|
21
|
-
|
22
17
|
def self.run(source, target, options = {})
|
23
|
-
new.run source, target, options
|
18
|
+
self.new.run source, target, options
|
24
19
|
end
|
25
20
|
|
26
21
|
def run(source, target, options = {})
|
27
22
|
workdir = '/...'
|
28
23
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
24
|
|
30
|
-
workdir = File.join(workdir, rand(
|
25
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
26
|
FileUtils.mkpath(workdir)
|
32
27
|
|
33
28
|
src_file = File.join(workdir, File.basename(source))
|
34
29
|
FileUtils.symlink source, src_file
|
35
30
|
|
36
|
-
tgt_file = File.join(workdir,
|
31
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
|
37
32
|
|
38
33
|
export_filter = options[:export_filter] || 'ods'
|
39
34
|
|
40
35
|
timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
|
41
|
-
|
36
|
+
Libis::Format::Config[:timeouts][:office_to_pdf]
|
42
37
|
result = Libis::Tools::Command.run(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
38
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
39
|
+
"-env:UserInstallation=file://#{workdir}",
|
40
|
+
'--convert-to', export_filter,
|
41
|
+
'--outdir', workdir, src_file,
|
42
|
+
timeout: timeout,
|
43
|
+
kill_after: timeout * 2
|
49
44
|
)
|
50
45
|
|
51
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
|
-
|
46
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
53
47
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
54
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
48
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
55
49
|
|
56
50
|
FileUtils.copy tgt_file, target, preserve: true
|
57
51
|
|
58
|
-
{
|
59
|
-
command: result,
|
60
|
-
files: [target]
|
61
|
-
}
|
62
52
|
ensure
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
nil
|
67
|
-
end
|
53
|
+
FileUtils.rmtree workdir rescue nil
|
54
|
+
|
55
|
+
result[:out]
|
68
56
|
end
|
69
57
|
end
|
58
|
+
|
70
59
|
end
|
71
60
|
end
|
72
61
|
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
# code utf-8
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
autoload :Droid, 'libis/format/tool/droid'
|
9
8
|
autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
|
10
9
|
autoload :Fido, 'libis/format/tool/fido'
|
11
10
|
autoload :FileTool, 'libis/format/tool/file_tool'
|
12
11
|
|
13
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
14
|
-
autoload :FFMpeg, 'libis/format/tool/
|
13
|
+
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
15
14
|
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
16
15
|
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
17
16
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
@@ -19,7 +18,7 @@ module Libis
|
|
19
18
|
autoload :PdfSplit, 'libis/format/tool/pdf_split'
|
20
19
|
autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
|
21
20
|
autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
|
22
|
-
|
21
|
+
|
23
22
|
end
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/version.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'yaml'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
class YamlLoader
|
8
|
+
# noinspection RubyResolve
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
def query(key, value)
|
12
|
+
case key.to_s.downcase.to_sym
|
13
|
+
when :name
|
14
|
+
return [database[value.to_s.upcase.to_sym]]
|
15
|
+
when :category
|
16
|
+
database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
|
17
|
+
when :puid
|
18
|
+
database.find_all { |_, info| info.puids.include?(value) }
|
19
|
+
when :mimetype
|
20
|
+
database.find_all { |_, info| info.mimetypes.include?(value) }
|
21
|
+
when :extension
|
22
|
+
database.find_all { |_, info| info.extensions.include?(value) }
|
23
|
+
else
|
24
|
+
return []
|
25
|
+
end.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_formats(file_or_hash)
|
29
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
30
|
+
hash.each do |category, format_list|
|
31
|
+
format_list.each do |format_name, format_info|
|
32
|
+
format_info.symbolize_keys!
|
33
|
+
format_name = format_name.to_sym
|
34
|
+
new_info = Libis::Format::Info.new(
|
35
|
+
name: format_name,
|
36
|
+
category: category.to_sym,
|
37
|
+
description: format_info[:NAME],
|
38
|
+
puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
|
39
|
+
mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
|
40
|
+
extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
|
41
|
+
)
|
42
|
+
if (old_info = database[format_name])
|
43
|
+
new_info = Libis::Format::Info.new(
|
44
|
+
name: format_name,
|
45
|
+
category: category.to_sym,
|
46
|
+
description: new_info.description.blank? ? old_info.description : new_info.description,
|
47
|
+
puids: (old_info.puids + new_info.puids).uniq,
|
48
|
+
mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
|
49
|
+
extensions: (old_info.extensions + new_info.extensions).uniq
|
50
|
+
)
|
51
|
+
end
|
52
|
+
database[format_name] = new_info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :database
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@database = {}
|
64
|
+
format_database = Libis::Format::Config[:format_library_database]
|
65
|
+
load_formats(format_database)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/libis/format.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/format/version'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
autoload :Config, 'libis/format/config'
|
8
|
-
|
9
|
-
autoload :
|
6
|
+
|
7
|
+
autoload :Info, 'libis/format/info'
|
8
|
+
autoload :Library, 'libis/format/library'
|
9
|
+
autoload :YamlLoader, 'libis/format/yaml_loader'
|
10
|
+
|
10
11
|
autoload :Identifier, 'libis/format/identifier'
|
11
12
|
|
12
13
|
autoload :Tool, 'libis/format/tool'
|
@@ -15,5 +16,6 @@ module Libis
|
|
15
16
|
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
16
17
|
DATA_DIR = File.join(ROOT_DIR, 'data')
|
17
18
|
TOOL_DIR = File.join(ROOT_DIR, 'tools')
|
19
|
+
|
18
20
|
end
|
19
|
-
end
|
21
|
+
end
|
data/lib/libis-format.rb
CHANGED
data/libis-format.gemspec
CHANGED
@@ -1,48 +1,42 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
|
3
|
-
lib = File.expand_path('lib',
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
-
require 'libis/format/version'
|
7
6
|
require 'bundler'
|
7
|
+
require 'libis/format/version'
|
8
8
|
|
9
9
|
Gem::Specification.new do |spec|
|
10
10
|
spec.name = 'libis-format'
|
11
11
|
spec.version = Libis::Format::VERSION
|
12
12
|
spec.authors = ['Kris Dekeyser']
|
13
13
|
spec.email = ['kris.dekeyser@libis.be']
|
14
|
-
spec.summary =
|
15
|
-
spec.description =
|
14
|
+
spec.summary = %q{LIBIS File format format services.}
|
15
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
16
16
|
spec.homepage = ''
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
19
|
spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
|
20
|
-
spec.required_ruby_version = '>= 3.2'
|
21
20
|
|
22
|
-
spec.files
|
23
|
-
f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
|
24
|
-
end
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
22
|
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
23
|
+
# spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
24
|
spec.require_paths = ['lib']
|
27
25
|
|
28
|
-
spec.
|
29
|
-
spec.
|
30
|
-
spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
|
31
|
-
spec.add_runtime_dependency 'libis-tools', '~> 1.1'
|
32
|
-
spec.add_runtime_dependency 'mini_magick', '~> 5.0.1'
|
33
|
-
spec.add_runtime_dependency 'naturally', '~> 2.2'
|
34
|
-
spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
|
35
|
-
spec.add_runtime_dependency 'os', '~> 1.1'
|
36
|
-
spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
|
37
|
-
spec.add_runtime_dependency 'pdfkit', '~> 0.8'
|
38
|
-
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
39
28
|
spec.add_development_dependency 'awesome_print'
|
40
|
-
spec.add_development_dependency 'equivalent-xml'
|
41
|
-
spec.
|
42
|
-
spec.add_development_dependency 'rspec'
|
43
|
-
if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.5'
|
30
|
+
if spec.platform == Gem::Platform::JAVA
|
44
31
|
spec.add_development_dependency 'saxon-xslt'
|
45
32
|
else
|
46
33
|
spec.add_development_dependency 'nokogiri'
|
47
34
|
end
|
35
|
+
|
36
|
+
spec.add_runtime_dependency 'libis-tools', '~> 1.0'
|
37
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
38
|
+
spec.add_runtime_dependency 'mini_magick', '~> 4.3'
|
39
|
+
spec.add_runtime_dependency 'deep_dive', '~> 0.3'
|
40
|
+
spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
|
41
|
+
spec.add_runtime_dependency 'naturally', '~> 2.1'
|
48
42
|
end
|
data/tools/PdfTool.jar
CHANGED
Binary file
|
Binary file
|
Binary file
|