libis-format 1.3.0 → 2.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -10
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +22 -20
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +50 -111
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +15 -24
- data/lib/libis/format/tool/pdf_merge.rb +14 -24
- data/lib/libis/format/tool/pdf_optimizer.rb +17 -24
- data/lib/libis/format/tool/pdf_split.rb +16 -25
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
- data/lib/libis/format/tool/pdfa_validator.rb +30 -25
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- metadata +78 -120
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,42 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfOptimizer
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, quality)
|
23
|
-
new.run source, target, quality
|
17
|
+
self.new.run source, target, quality
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, quality)
|
21
|
+
|
27
22
|
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
23
|
result = Libis::Tools::Command.run(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
'gs',
|
25
|
+
'-sDEVICE=pdfwrite',
|
26
|
+
'-dCompatibilityLevel=1.4',
|
27
|
+
"-dPDFSETTINGS=/#{quality}",
|
28
|
+
'-dNOPAUSE',
|
29
|
+
'-dBATCH',
|
30
|
+
"-sOutputFile=#{target}",
|
31
|
+
"#{source}",
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
39
34
|
)
|
40
35
|
|
41
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
42
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
43
38
|
|
44
|
-
|
45
|
-
command: result,
|
46
|
-
files: [target]
|
47
|
-
}
|
39
|
+
result
|
48
40
|
end
|
49
41
|
end
|
42
|
+
|
50
43
|
end
|
51
44
|
end
|
52
45
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,46 +9,39 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfSplit
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
22
18
|
end
|
23
19
|
|
24
|
-
def
|
25
|
-
new.run source, target, *args
|
26
|
-
end
|
20
|
+
def run(source, target, options = [])
|
27
21
|
|
28
|
-
def run(source, target, *args)
|
29
22
|
if OS.java?
|
30
23
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
31
24
|
end
|
32
25
|
|
33
26
|
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
34
27
|
result = Libis::Tools::Command.run(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'SplitPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
43
36
|
)
|
44
37
|
|
45
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
47
40
|
|
48
|
-
|
49
|
-
command: result,
|
50
|
-
files: [target] # TODO: collect the files
|
51
|
-
}
|
41
|
+
result
|
52
42
|
end
|
53
43
|
end
|
44
|
+
|
54
45
|
end
|
55
46
|
end
|
56
47
|
end
|
@@ -1,9 +1,6 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'tempfile'
|
4
2
|
require 'csv'
|
5
3
|
require 'fileutils'
|
6
|
-
require 'pdfinfo'
|
7
4
|
|
8
5
|
require 'libis/tools/extend/string'
|
9
6
|
require 'libis/tools/logger'
|
@@ -15,24 +12,19 @@ require 'libis/format'
|
|
15
12
|
module Libis
|
16
13
|
module Format
|
17
14
|
module Tool
|
15
|
+
|
18
16
|
class PdfToPdfa
|
19
17
|
include ::Libis::Tools::Logger
|
20
18
|
|
21
|
-
def self.
|
22
|
-
|
23
|
-
result.zero?
|
19
|
+
def self.run(source, target = nil, options = {})
|
20
|
+
self.new.run source, target, options
|
24
21
|
end
|
25
22
|
|
26
|
-
def
|
27
|
-
new.run source, target, **options
|
28
|
-
end
|
23
|
+
def run(source, target = nil, options = nil)
|
29
24
|
|
30
|
-
def run(source, target = nil, **options)
|
31
25
|
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
32
26
|
target ||= tmp_target
|
33
27
|
|
34
|
-
metadata = get_metadata(source)
|
35
|
-
|
36
28
|
icc_info = icc_options(options[:colorspace])
|
37
29
|
|
38
30
|
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
@@ -40,62 +32,52 @@ module Libis
|
|
40
32
|
|
41
33
|
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
42
34
|
File.open(def_filename, 'w') do |f|
|
43
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
|
44
|
-
|
45
|
-
|
46
|
-
.gsub('[**METADATA**]', metadata)
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
47
38
|
end
|
48
39
|
|
49
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
50
41
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
source,
|
63
|
-
timeout:,
|
64
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
65
53
|
)
|
66
54
|
|
67
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
68
|
-
raise "#{self.class} failed with error #{result[:status]}: \n#{(result[:out] + result[:err]).join("\n")}" if result[:status] != 0
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
69
56
|
|
70
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
71
58
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
64
|
+
result
|
76
65
|
end
|
77
66
|
|
78
|
-
private
|
79
67
|
|
80
|
-
|
81
|
-
info = Pdfinfo.new(source)
|
82
|
-
metadata = "/Title (#{info.title})"
|
83
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
84
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
85
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
86
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
87
|
-
metadata
|
88
|
-
end
|
68
|
+
private
|
89
69
|
|
90
70
|
def icc_options(colorspace)
|
91
71
|
case colorspace.to_s.downcase
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
96
76
|
end
|
97
77
|
end
|
78
|
+
|
98
79
|
end
|
80
|
+
|
99
81
|
end
|
100
82
|
end
|
101
83
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,58 +9,65 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfaValidator
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
16
|
def self.run(source)
|
18
|
-
new.run source
|
17
|
+
self.new.run source
|
19
18
|
end
|
20
19
|
|
21
20
|
def run(source)
|
21
|
+
|
22
22
|
src_file = File.absolute_path(source)
|
23
23
|
|
24
24
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
25
|
-
result = nil
|
26
25
|
if (pdfa = Libis::Format::Config[:pdfa_cmd])
|
27
26
|
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
28
27
|
previous_wd = Dir.getwd
|
29
28
|
Dir.chdir(Dir.tmpdir)
|
30
29
|
|
31
30
|
result = Libis::Tools::Command.run(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
pdfa,
|
32
|
+
'--noxml',
|
33
|
+
'--level', 'B',
|
34
|
+
'--verb', '0',
|
35
|
+
src_file,
|
36
|
+
timeout: timeout,
|
37
|
+
kill_after: timeout * 2
|
39
38
|
)
|
40
39
|
|
41
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
42
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
40
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
43
42
|
|
44
43
|
Dir.chdir(previous_wd)
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
|
46
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
47
|
+
result[:out].join("\n")
|
48
|
+
return false
|
49
|
+
end
|
50
50
|
else
|
51
51
|
jar = Libis::Format::Config[:preflight_jar]
|
52
52
|
result = Libis::Tools::Command.run(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
53
|
+
Libis::Format::Config[:java_cmd],
|
54
|
+
'-jar', jar,
|
55
|
+
src_file,
|
56
|
+
timeout: timeout,
|
57
|
+
kill_after: timeout * 2
|
58
58
|
)
|
59
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
59
60
|
|
60
|
-
|
61
|
-
|
61
|
+
unless result[:status] == 0
|
62
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
63
|
+
result[:out].join("\n")
|
64
|
+
return false
|
65
|
+
end
|
62
66
|
end
|
63
|
-
|
67
|
+
true
|
64
68
|
end
|
65
69
|
end
|
70
|
+
|
66
71
|
end
|
67
72
|
end
|
68
73
|
end
|
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# noinspection RubyResolve
|
3
2
|
require 'fileutils'
|
4
3
|
|
5
4
|
require 'libis/tools/extend/string'
|
@@ -11,62 +10,52 @@ require 'libis/format/config'
|
|
11
10
|
module Libis
|
12
11
|
module Format
|
13
12
|
module Tool
|
13
|
+
|
14
14
|
class SpreadsheetToOds
|
15
15
|
include ::Libis::Tools::Logger
|
16
16
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
-
result.zero?
|
20
|
-
end
|
21
|
-
|
22
17
|
def self.run(source, target, options = {})
|
23
|
-
new.run source, target, options
|
18
|
+
self.new.run source, target, options
|
24
19
|
end
|
25
20
|
|
26
21
|
def run(source, target, options = {})
|
27
22
|
workdir = '/...'
|
28
23
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
24
|
|
30
|
-
workdir = File.join(workdir, rand(
|
25
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
26
|
FileUtils.mkpath(workdir)
|
32
27
|
|
33
28
|
src_file = File.join(workdir, File.basename(source))
|
34
29
|
FileUtils.symlink source, src_file
|
35
30
|
|
36
|
-
tgt_file = File.join(workdir,
|
31
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
|
37
32
|
|
38
33
|
export_filter = options[:export_filter] || 'ods'
|
39
34
|
|
40
35
|
timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
|
41
|
-
|
36
|
+
Libis::Format::Config[:timeouts][:office_to_pdf]
|
42
37
|
result = Libis::Tools::Command.run(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
38
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
39
|
+
"-env:UserInstallation=file://#{workdir}",
|
40
|
+
'--convert-to', export_filter,
|
41
|
+
'--outdir', workdir, src_file,
|
42
|
+
timeout: timeout,
|
43
|
+
kill_after: timeout * 2
|
49
44
|
)
|
50
45
|
|
51
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
|
-
|
46
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
53
47
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
54
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
48
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
55
49
|
|
56
50
|
FileUtils.copy tgt_file, target, preserve: true
|
57
51
|
|
58
|
-
{
|
59
|
-
command: result,
|
60
|
-
files: [target]
|
61
|
-
}
|
62
52
|
ensure
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
nil
|
67
|
-
end
|
53
|
+
FileUtils.rmtree workdir rescue nil
|
54
|
+
|
55
|
+
result[:out]
|
68
56
|
end
|
69
57
|
end
|
58
|
+
|
70
59
|
end
|
71
60
|
end
|
72
61
|
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
# code utf-8
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
autoload :Droid, 'libis/format/tool/droid'
|
9
8
|
autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
|
10
9
|
autoload :Fido, 'libis/format/tool/fido'
|
11
10
|
autoload :FileTool, 'libis/format/tool/file_tool'
|
12
11
|
|
13
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
14
|
-
autoload :FFMpeg, 'libis/format/tool/
|
13
|
+
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
15
14
|
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
16
15
|
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
17
16
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
@@ -19,7 +18,7 @@ module Libis
|
|
19
18
|
autoload :PdfSplit, 'libis/format/tool/pdf_split'
|
20
19
|
autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
|
21
20
|
autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
|
22
|
-
|
21
|
+
|
23
22
|
end
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/version.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'yaml'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
class YamlLoader
|
8
|
+
# noinspection RubyResolve
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
def query(key, value)
|
12
|
+
case key.to_s.downcase.to_sym
|
13
|
+
when :name
|
14
|
+
return [database[value.to_s.upcase.to_sym]]
|
15
|
+
when :category
|
16
|
+
database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
|
17
|
+
when :puid
|
18
|
+
database.find_all { |_, info| info.puids.include?(value) }
|
19
|
+
when :mimetype
|
20
|
+
database.find_all { |_, info| info.mimetypes.include?(value) }
|
21
|
+
when :extension
|
22
|
+
database.find_all { |_, info| info.extensions.include?(value) }
|
23
|
+
else
|
24
|
+
return []
|
25
|
+
end.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_formats(file_or_hash)
|
29
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
30
|
+
hash.each do |category, format_list|
|
31
|
+
format_list.each do |format_name, format_info|
|
32
|
+
format_info.symbolize_keys!
|
33
|
+
format_name = format_name.to_sym
|
34
|
+
new_info = Libis::Format::Info.new(
|
35
|
+
name: format_name,
|
36
|
+
category: category.to_sym,
|
37
|
+
description: format_info[:NAME],
|
38
|
+
puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
|
39
|
+
mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
|
40
|
+
extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
|
41
|
+
)
|
42
|
+
if (old_info = database[format_name])
|
43
|
+
new_info = Libis::Format::Info.new(
|
44
|
+
name: format_name,
|
45
|
+
category: category.to_sym,
|
46
|
+
description: new_info.description.blank? ? old_info.description : new_info.description,
|
47
|
+
puids: (old_info.puids + new_info.puids).uniq,
|
48
|
+
mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
|
49
|
+
extensions: (old_info.extensions + new_info.extensions).uniq
|
50
|
+
)
|
51
|
+
end
|
52
|
+
database[format_name] = new_info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :database
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@database = {}
|
64
|
+
format_database = Libis::Format::Config[:format_library_database]
|
65
|
+
load_formats(format_database)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/libis/format.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/format/version'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
autoload :Config, 'libis/format/config'
|
8
|
-
|
9
|
-
autoload :
|
6
|
+
|
7
|
+
autoload :Info, 'libis/format/info'
|
8
|
+
autoload :Library, 'libis/format/library'
|
9
|
+
autoload :YamlLoader, 'libis/format/yaml_loader'
|
10
|
+
|
10
11
|
autoload :Identifier, 'libis/format/identifier'
|
11
12
|
|
12
13
|
autoload :Tool, 'libis/format/tool'
|
@@ -15,5 +16,6 @@ module Libis
|
|
15
16
|
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
16
17
|
DATA_DIR = File.join(ROOT_DIR, 'data')
|
17
18
|
TOOL_DIR = File.join(ROOT_DIR, 'tools')
|
19
|
+
|
18
20
|
end
|
19
|
-
end
|
21
|
+
end
|
data/lib/libis-format.rb
CHANGED
data/libis-format.gemspec
CHANGED
@@ -1,48 +1,42 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
|
3
|
-
lib = File.expand_path('lib',
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
-
require 'libis/format/version'
|
7
6
|
require 'bundler'
|
7
|
+
require 'libis/format/version'
|
8
8
|
|
9
9
|
Gem::Specification.new do |spec|
|
10
10
|
spec.name = 'libis-format'
|
11
11
|
spec.version = Libis::Format::VERSION
|
12
12
|
spec.authors = ['Kris Dekeyser']
|
13
13
|
spec.email = ['kris.dekeyser@libis.be']
|
14
|
-
spec.summary =
|
15
|
-
spec.description =
|
14
|
+
spec.summary = %q{LIBIS File format format services.}
|
15
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
16
16
|
spec.homepage = ''
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
19
|
spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
|
20
|
-
spec.required_ruby_version = '>= 3.2'
|
21
20
|
|
22
|
-
spec.files
|
23
|
-
f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
|
24
|
-
end
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
22
|
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
23
|
+
# spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
24
|
spec.require_paths = ['lib']
|
27
25
|
|
28
|
-
spec.
|
29
|
-
spec.
|
30
|
-
spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
|
31
|
-
spec.add_runtime_dependency 'libis-tools', '~> 1.1'
|
32
|
-
spec.add_runtime_dependency 'mini_magick', '~> 4.12'
|
33
|
-
spec.add_runtime_dependency 'naturally', '~> 2.2'
|
34
|
-
spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
|
35
|
-
spec.add_runtime_dependency 'os', '~> 1.1'
|
36
|
-
spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
|
37
|
-
spec.add_runtime_dependency 'pdfkit', '~> 0.8'
|
38
|
-
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
39
28
|
spec.add_development_dependency 'awesome_print'
|
40
|
-
spec.add_development_dependency 'equivalent-xml'
|
41
|
-
spec.
|
42
|
-
spec.add_development_dependency 'rspec'
|
43
|
-
if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.5'
|
30
|
+
if spec.platform == Gem::Platform::JAVA
|
44
31
|
spec.add_development_dependency 'saxon-xslt'
|
45
32
|
else
|
46
33
|
spec.add_development_dependency 'nokogiri'
|
47
34
|
end
|
35
|
+
|
36
|
+
spec.add_runtime_dependency 'libis-tools', '~> 1.0'
|
37
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
38
|
+
spec.add_runtime_dependency 'mini_magick', '~> 4.3'
|
39
|
+
spec.add_runtime_dependency 'deep_dive', '~> 0.3'
|
40
|
+
spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
|
41
|
+
spec.add_runtime_dependency 'naturally', '~> 2.1'
|
48
42
|
end
|