libis-format 1.3.3 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -12
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/bin/{pdf_tool → pdf_copy} +2 -3
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +23 -19
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +52 -200
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +47 -0
- data/lib/libis/format/tool/pdf_merge.rb +19 -25
- data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
- data/lib/libis/format/tool/pdf_split.rb +33 -6
- data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
- data/lib/libis/format/tool/pdfa_validator.rb +30 -24
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- data/tools/PdfTool.jar +0 -0
- data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
- data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
- metadata +83 -125
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/tool/pdf_tool.rb +0 -52
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,38 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfOptimizer
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, quality)
|
23
|
-
new.run source, target, quality
|
17
|
+
self.new.run source, target, quality
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, quality)
|
27
|
-
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
|
-
args = [
|
29
|
-
Libis::Format::Config[:ghostscript_cmd],
|
30
|
-
'-sDEVICE=pdfwrite',
|
31
|
-
'-dCompatibilityLevel=1.4',
|
32
|
-
"-dPDFSETTINGS=/#{quality}",
|
33
|
-
'-dNOPAUSE',
|
34
|
-
'-dBATCH',
|
35
|
-
"-sOutputFile=#{target}",
|
36
|
-
source.to_s
|
37
|
-
]
|
38
|
-
|
39
|
-
result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
|
40
21
|
|
41
|
-
|
22
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
23
|
+
result = Libis::Tools::Command.run(
|
24
|
+
'gs',
|
25
|
+
'-sDEVICE=pdfwrite',
|
26
|
+
'-dCompatibilityLevel=1.4',
|
27
|
+
"-dPDFSETTINGS=/#{quality}",
|
28
|
+
'-dNOPAUSE',
|
29
|
+
'-dBATCH',
|
30
|
+
"-sOutputFile=#{target}",
|
31
|
+
"#{source}",
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
34
|
+
)
|
35
|
+
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
42
38
|
|
43
39
|
result
|
44
40
|
end
|
45
41
|
end
|
42
|
+
|
46
43
|
end
|
47
44
|
end
|
48
45
|
end
|
@@ -1,20 +1,47 @@
|
|
1
|
-
|
1
|
+
require 'os'
|
2
2
|
|
3
|
-
require 'libis/
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
4
8
|
|
5
9
|
module Libis
|
6
10
|
module Format
|
7
11
|
module Tool
|
12
|
+
|
8
13
|
class PdfSplit
|
14
|
+
include ::Libis::Tools::Logger
|
9
15
|
|
10
|
-
def self.run(source, target,
|
11
|
-
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
12
18
|
end
|
13
19
|
|
14
|
-
def run(source, target,
|
15
|
-
|
20
|
+
def run(source, target, options = [])
|
21
|
+
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
27
|
+
result = Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'SplitPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
36
|
+
)
|
37
|
+
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
40
|
+
|
41
|
+
result
|
16
42
|
end
|
17
43
|
end
|
44
|
+
|
18
45
|
end
|
19
46
|
end
|
20
47
|
end
|
@@ -1,9 +1,6 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'tempfile'
|
4
2
|
require 'csv'
|
5
3
|
require 'fileutils'
|
6
|
-
require 'pdfinfo'
|
7
4
|
|
8
5
|
require 'libis/tools/extend/string'
|
9
6
|
require 'libis/tools/logger'
|
@@ -15,24 +12,19 @@ require 'libis/format'
|
|
15
12
|
module Libis
|
16
13
|
module Format
|
17
14
|
module Tool
|
15
|
+
|
18
16
|
class PdfToPdfa
|
19
17
|
include ::Libis::Tools::Logger
|
20
18
|
|
21
|
-
def self.
|
22
|
-
|
23
|
-
result.zero?
|
19
|
+
def self.run(source, target = nil, options = {})
|
20
|
+
self.new.run source, target, options
|
24
21
|
end
|
25
22
|
|
26
|
-
def
|
27
|
-
new.run source, target, **options
|
28
|
-
end
|
23
|
+
def run(source, target = nil, options = nil)
|
29
24
|
|
30
|
-
def run(source, target = nil, **options)
|
31
25
|
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
32
26
|
target ||= tmp_target
|
33
27
|
|
34
|
-
metadata = get_metadata(source)
|
35
|
-
|
36
28
|
icc_info = icc_options(options[:colorspace])
|
37
29
|
|
38
30
|
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
@@ -40,58 +32,52 @@ module Libis
|
|
40
32
|
|
41
33
|
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
42
34
|
File.open(def_filename, 'w') do |f|
|
43
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
|
44
|
-
|
45
|
-
|
46
|
-
.gsub('[**METADATA**]', metadata)
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
47
38
|
end
|
48
39
|
|
49
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
50
41
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
source,
|
63
|
-
timeout:,
|
64
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
65
53
|
)
|
66
54
|
|
67
|
-
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
68
56
|
|
69
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
70
58
|
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
71
64
|
result
|
72
65
|
end
|
73
66
|
|
74
|
-
private
|
75
67
|
|
76
|
-
|
77
|
-
info = Pdfinfo.new(source)
|
78
|
-
metadata = "/Title (#{info.title})"
|
79
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
80
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
81
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
82
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
83
|
-
metadata
|
84
|
-
end
|
68
|
+
private
|
85
69
|
|
86
70
|
def icc_options(colorspace)
|
87
71
|
case colorspace.to_s.downcase
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
92
76
|
end
|
93
77
|
end
|
78
|
+
|
94
79
|
end
|
80
|
+
|
95
81
|
end
|
96
82
|
end
|
97
83
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,57 +9,65 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfaValidator
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
16
|
def self.run(source)
|
18
|
-
new.run source
|
17
|
+
self.new.run source
|
19
18
|
end
|
20
19
|
|
21
20
|
def run(source)
|
21
|
+
|
22
22
|
src_file = File.absolute_path(source)
|
23
23
|
|
24
24
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
25
|
-
result = nil
|
26
25
|
if (pdfa = Libis::Format::Config[:pdfa_cmd])
|
27
26
|
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
28
27
|
previous_wd = Dir.getwd
|
29
28
|
Dir.chdir(Dir.tmpdir)
|
30
29
|
|
31
30
|
result = Libis::Tools::Command.run(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
pdfa,
|
32
|
+
'--noxml',
|
33
|
+
'--level', 'B',
|
34
|
+
'--verb', '0',
|
35
|
+
src_file,
|
36
|
+
timeout: timeout,
|
37
|
+
kill_after: timeout * 2
|
39
38
|
)
|
40
39
|
|
41
|
-
|
40
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
42
42
|
|
43
43
|
Dir.chdir(previous_wd)
|
44
44
|
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
45
|
+
unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
|
46
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
47
|
+
result[:out].join("\n")
|
48
|
+
return false
|
49
|
+
end
|
49
50
|
else
|
50
51
|
jar = Libis::Format::Config[:preflight_jar]
|
51
52
|
result = Libis::Tools::Command.run(
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
53
|
+
Libis::Format::Config[:java_cmd],
|
54
|
+
'-jar', jar,
|
55
|
+
src_file,
|
56
|
+
timeout: timeout,
|
57
|
+
kill_after: timeout * 2
|
57
58
|
)
|
59
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
58
60
|
|
59
|
-
result[:
|
60
|
-
|
61
|
+
unless result[:status] == 0
|
62
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
63
|
+
result[:out].join("\n")
|
64
|
+
return false
|
65
|
+
end
|
61
66
|
end
|
62
|
-
|
67
|
+
true
|
63
68
|
end
|
64
69
|
end
|
70
|
+
|
65
71
|
end
|
66
72
|
end
|
67
73
|
end
|
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# noinspection RubyResolve
|
3
2
|
require 'fileutils'
|
4
3
|
|
5
4
|
require 'libis/tools/extend/string'
|
@@ -11,62 +10,52 @@ require 'libis/format/config'
|
|
11
10
|
module Libis
|
12
11
|
module Format
|
13
12
|
module Tool
|
13
|
+
|
14
14
|
class SpreadsheetToOds
|
15
15
|
include ::Libis::Tools::Logger
|
16
16
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
-
result.zero?
|
20
|
-
end
|
21
|
-
|
22
17
|
def self.run(source, target, options = {})
|
23
|
-
new.run source, target, options
|
18
|
+
self.new.run source, target, options
|
24
19
|
end
|
25
20
|
|
26
21
|
def run(source, target, options = {})
|
27
22
|
workdir = '/...'
|
28
23
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
24
|
|
30
|
-
workdir = File.join(workdir, rand(
|
25
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
26
|
FileUtils.mkpath(workdir)
|
32
27
|
|
33
28
|
src_file = File.join(workdir, File.basename(source))
|
34
29
|
FileUtils.symlink source, src_file
|
35
30
|
|
36
|
-
tgt_file = File.join(workdir,
|
31
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
|
37
32
|
|
38
33
|
export_filter = options[:export_filter] || 'ods'
|
39
34
|
|
40
35
|
timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
|
41
|
-
|
36
|
+
Libis::Format::Config[:timeouts][:office_to_pdf]
|
42
37
|
result = Libis::Tools::Command.run(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
38
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
39
|
+
"-env:UserInstallation=file://#{workdir}",
|
40
|
+
'--convert-to', export_filter,
|
41
|
+
'--outdir', workdir, src_file,
|
42
|
+
timeout: timeout,
|
43
|
+
kill_after: timeout * 2
|
49
44
|
)
|
50
45
|
|
51
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
|
-
|
46
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
53
47
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
54
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
48
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
55
49
|
|
56
50
|
FileUtils.copy tgt_file, target, preserve: true
|
57
51
|
|
58
|
-
{
|
59
|
-
command: result,
|
60
|
-
files: [target]
|
61
|
-
}
|
62
52
|
ensure
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
nil
|
67
|
-
end
|
53
|
+
FileUtils.rmtree workdir rescue nil
|
54
|
+
|
55
|
+
result[:out]
|
68
56
|
end
|
69
57
|
end
|
58
|
+
|
70
59
|
end
|
71
60
|
end
|
72
61
|
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
# code utf-8
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
autoload :Droid, 'libis/format/tool/droid'
|
9
8
|
autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
|
10
9
|
autoload :Fido, 'libis/format/tool/fido'
|
11
10
|
autoload :FileTool, 'libis/format/tool/file_tool'
|
12
11
|
|
13
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
14
|
-
autoload :FFMpeg, 'libis/format/tool/
|
13
|
+
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
15
14
|
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
16
15
|
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
17
16
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
@@ -19,7 +18,7 @@ module Libis
|
|
19
18
|
autoload :PdfSplit, 'libis/format/tool/pdf_split'
|
20
19
|
autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
|
21
20
|
autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
|
22
|
-
|
21
|
+
|
23
22
|
end
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/version.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'yaml'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
class YamlLoader
|
8
|
+
# noinspection RubyResolve
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
def query(key, value)
|
12
|
+
case key.to_s.downcase.to_sym
|
13
|
+
when :name
|
14
|
+
return [database[value.to_s.upcase.to_sym]]
|
15
|
+
when :category
|
16
|
+
database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
|
17
|
+
when :puid
|
18
|
+
database.find_all { |_, info| info.puids.include?(value) }
|
19
|
+
when :mimetype
|
20
|
+
database.find_all { |_, info| info.mimetypes.include?(value) }
|
21
|
+
when :extension
|
22
|
+
database.find_all { |_, info| info.extensions.include?(value) }
|
23
|
+
else
|
24
|
+
return []
|
25
|
+
end.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_formats(file_or_hash)
|
29
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
30
|
+
hash.each do |category, format_list|
|
31
|
+
format_list.each do |format_name, format_info|
|
32
|
+
format_info.symbolize_keys!
|
33
|
+
format_name = format_name.to_sym
|
34
|
+
new_info = Libis::Format::Info.new(
|
35
|
+
name: format_name,
|
36
|
+
category: category.to_sym,
|
37
|
+
description: format_info[:NAME],
|
38
|
+
puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
|
39
|
+
mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
|
40
|
+
extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
|
41
|
+
)
|
42
|
+
if (old_info = database[format_name])
|
43
|
+
new_info = Libis::Format::Info.new(
|
44
|
+
name: format_name,
|
45
|
+
category: category.to_sym,
|
46
|
+
description: new_info.description.blank? ? old_info.description : new_info.description,
|
47
|
+
puids: (old_info.puids + new_info.puids).uniq,
|
48
|
+
mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
|
49
|
+
extensions: (old_info.extensions + new_info.extensions).uniq
|
50
|
+
)
|
51
|
+
end
|
52
|
+
database[format_name] = new_info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :database
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@database = {}
|
64
|
+
format_database = Libis::Format::Config[:format_library_database]
|
65
|
+
load_formats(format_database)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/libis/format.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/format/version'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
autoload :Config, 'libis/format/config'
|
8
|
-
|
9
|
-
autoload :
|
6
|
+
|
7
|
+
autoload :Info, 'libis/format/info'
|
8
|
+
autoload :Library, 'libis/format/library'
|
9
|
+
autoload :YamlLoader, 'libis/format/yaml_loader'
|
10
|
+
|
10
11
|
autoload :Identifier, 'libis/format/identifier'
|
11
12
|
|
12
13
|
autoload :Tool, 'libis/format/tool'
|
@@ -15,5 +16,6 @@ module Libis
|
|
15
16
|
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
16
17
|
DATA_DIR = File.join(ROOT_DIR, 'data')
|
17
18
|
TOOL_DIR = File.join(ROOT_DIR, 'tools')
|
19
|
+
|
18
20
|
end
|
19
|
-
end
|
21
|
+
end
|
data/lib/libis-format.rb
CHANGED
data/libis-format.gemspec
CHANGED
@@ -1,48 +1,42 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
|
3
|
-
lib = File.expand_path('lib',
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
-
require 'libis/format/version'
|
7
6
|
require 'bundler'
|
7
|
+
require 'libis/format/version'
|
8
8
|
|
9
9
|
Gem::Specification.new do |spec|
|
10
10
|
spec.name = 'libis-format'
|
11
11
|
spec.version = Libis::Format::VERSION
|
12
12
|
spec.authors = ['Kris Dekeyser']
|
13
13
|
spec.email = ['kris.dekeyser@libis.be']
|
14
|
-
spec.summary =
|
15
|
-
spec.description =
|
14
|
+
spec.summary = %q{LIBIS File format format services.}
|
15
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
16
16
|
spec.homepage = ''
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
19
|
spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
|
20
|
-
spec.required_ruby_version = '>= 3.2'
|
21
20
|
|
22
|
-
spec.files
|
23
|
-
f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
|
24
|
-
end
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
22
|
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
23
|
+
# spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
24
|
spec.require_paths = ['lib']
|
27
25
|
|
28
|
-
spec.
|
29
|
-
spec.
|
30
|
-
spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
|
31
|
-
spec.add_runtime_dependency 'libis-tools', '~> 1.1'
|
32
|
-
spec.add_runtime_dependency 'mini_magick', '~> 4.12'
|
33
|
-
spec.add_runtime_dependency 'naturally', '~> 2.2'
|
34
|
-
spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
|
35
|
-
spec.add_runtime_dependency 'os', '~> 1.1'
|
36
|
-
spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
|
37
|
-
spec.add_runtime_dependency 'pdfkit', '~> 0.8'
|
38
|
-
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
39
28
|
spec.add_development_dependency 'awesome_print'
|
40
|
-
spec.add_development_dependency 'equivalent-xml'
|
41
|
-
spec.
|
42
|
-
spec.add_development_dependency 'rspec'
|
43
|
-
if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.5'
|
30
|
+
if spec.platform == Gem::Platform::JAVA
|
44
31
|
spec.add_development_dependency 'saxon-xslt'
|
45
32
|
else
|
46
33
|
spec.add_development_dependency 'nokogiri'
|
47
34
|
end
|
35
|
+
|
36
|
+
spec.add_runtime_dependency 'libis-tools', '~> 1.0'
|
37
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
38
|
+
spec.add_runtime_dependency 'mini_magick', '~> 4.3'
|
39
|
+
spec.add_runtime_dependency 'deep_dive', '~> 0.3'
|
40
|
+
spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
|
41
|
+
spec.add_runtime_dependency 'naturally', '~> 2.1'
|
48
42
|
end
|
data/tools/PdfTool.jar
CHANGED
Binary file
|
Binary file
|
Binary file
|