libis-format 1.3.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.coveralls.yml +2 -0
- data/.gitignore +20 -0
- data/.travis.yml +70 -0
- data/Gemfile +0 -10
- data/README.md +2 -2
- data/Rakefile +8 -0
- data/base/Dockerfile +35 -0
- data/base/Dockerfile.alpine +20 -0
- data/base/Dockerfile.rvm +56 -0
- data/base/rework_path +20 -0
- data/data/PDFA_def.ps +3 -3
- data/data/eciRGB_v2.icc +0 -0
- data/data/types.yml +4 -17
- data/docker_cfg.yml +1 -0
- data/lib/libis/format/cli/convert.rb +4 -4
- data/lib/libis/format/cli/prompt_helper.rb +24 -32
- data/lib/libis/format/command_line.rb +3 -2
- data/lib/libis/format/config.rb +22 -20
- data/lib/libis/format/converter/audio_converter.rb +31 -56
- data/lib/libis/format/converter/base.rb +36 -16
- data/lib/libis/format/converter/chain.rb +32 -52
- data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
- data/lib/libis/format/converter/image_assembler.rb +82 -0
- data/lib/libis/format/converter/image_converter.rb +40 -153
- data/lib/libis/format/converter/image_splitter.rb +80 -0
- data/lib/libis/format/converter/image_watermarker.rb +261 -0
- data/lib/libis/format/converter/jp2_converter.rb +38 -36
- data/lib/libis/format/converter/office_converter.rb +28 -22
- data/lib/libis/format/converter/pdf_assembler.rb +66 -0
- data/lib/libis/format/converter/pdf_converter.rb +50 -111
- data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
- data/lib/libis/format/converter/pdf_splitter.rb +65 -0
- data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
- data/lib/libis/format/converter/repository.rb +13 -7
- data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
- data/lib/libis/format/converter/video_converter.rb +58 -47
- data/lib/libis/format/converter/xslt_converter.rb +11 -13
- data/lib/libis/format/converter.rb +1 -1
- data/lib/libis/format/identifier.rb +46 -44
- data/lib/libis/format/info.rb +27 -0
- data/lib/libis/format/library.rb +147 -0
- data/lib/libis/format/tool/droid.rb +30 -29
- data/lib/libis/format/tool/extension_identification.rb +26 -24
- data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
- data/lib/libis/format/tool/fido.rb +27 -22
- data/lib/libis/format/tool/file_tool.rb +24 -11
- data/lib/libis/format/tool/fop_pdf.rb +14 -25
- data/lib/libis/format/tool/identification_tool.rb +40 -38
- data/lib/libis/format/tool/office_to_pdf.rb +18 -30
- data/lib/libis/format/tool/pdf_copy.rb +15 -24
- data/lib/libis/format/tool/pdf_merge.rb +14 -24
- data/lib/libis/format/tool/pdf_optimizer.rb +17 -24
- data/lib/libis/format/tool/pdf_split.rb +16 -25
- data/lib/libis/format/tool/pdf_to_pdfa.rb +32 -50
- data/lib/libis/format/tool/pdfa_validator.rb +30 -25
- data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
- data/lib/libis/format/tool.rb +3 -4
- data/lib/libis/format/version.rb +1 -3
- data/lib/libis/format/yaml_loader.rb +71 -0
- data/lib/libis/format.rb +7 -5
- data/lib/libis-format.rb +0 -2
- data/libis-format.gemspec +18 -24
- metadata +78 -120
- data/data/AdobeRGB1998.icc +0 -0
- data/lib/libis/format/converter/email_converter.rb +0 -35
- data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
- data/lib/libis/format/type_database.rb +0 -156
- data/lib/libis/format/type_database_impl.rb +0 -153
- data/tools/pdf2pdfa +0 -395
- /data/bin/{droid_tool → droid} +0 -0
- /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,42 +9,37 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfOptimizer
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
|
19
|
-
(result[:status]).zero?
|
20
|
-
end
|
21
|
-
|
22
16
|
def self.run(source, target, quality)
|
23
|
-
new.run source, target, quality
|
17
|
+
self.new.run source, target, quality
|
24
18
|
end
|
25
19
|
|
26
20
|
def run(source, target, quality)
|
21
|
+
|
27
22
|
timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
|
28
23
|
result = Libis::Tools::Command.run(
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
24
|
+
'gs',
|
25
|
+
'-sDEVICE=pdfwrite',
|
26
|
+
'-dCompatibilityLevel=1.4',
|
27
|
+
"-dPDFSETTINGS=/#{quality}",
|
28
|
+
'-dNOPAUSE',
|
29
|
+
'-dBATCH',
|
30
|
+
"-sOutputFile=#{target}",
|
31
|
+
"#{source}",
|
32
|
+
timeout: timeout,
|
33
|
+
kill_after: timeout * 2
|
39
34
|
)
|
40
35
|
|
41
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
42
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
36
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
37
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
43
38
|
|
44
|
-
|
45
|
-
command: result,
|
46
|
-
files: [target]
|
47
|
-
}
|
39
|
+
result
|
48
40
|
end
|
49
41
|
end
|
42
|
+
|
50
43
|
end
|
51
44
|
end
|
52
45
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'os'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,46 +9,39 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfSplit
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
|
-
def self.
|
18
|
-
|
19
|
-
return false unless (result[:status]).zero?
|
20
|
-
|
21
|
-
File.exist?(Libis::Format::Config[:pdf_tool])
|
16
|
+
def self.run(source, target, options = [])
|
17
|
+
self.new.run source, target, options
|
22
18
|
end
|
23
19
|
|
24
|
-
def
|
25
|
-
new.run source, target, *args
|
26
|
-
end
|
20
|
+
def run(source, target, options = [])
|
27
21
|
|
28
|
-
def run(source, target, *args)
|
29
22
|
if OS.java?
|
30
23
|
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
31
24
|
end
|
32
25
|
|
33
26
|
timeout = Libis::Format::Config[:timeouts][:pdf_split]
|
34
27
|
result = Libis::Tools::Command.run(
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
28
|
+
Libis::Format::Config[:java_cmd],
|
29
|
+
'-cp', Libis::Format::Config[:pdf_tool],
|
30
|
+
'SplitPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
timeout: timeout,
|
35
|
+
kill_after: timeout * 2
|
43
36
|
)
|
44
37
|
|
45
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
46
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
38
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
39
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
47
40
|
|
48
|
-
|
49
|
-
command: result,
|
50
|
-
files: [target] # TODO: collect the files
|
51
|
-
}
|
41
|
+
result
|
52
42
|
end
|
53
43
|
end
|
44
|
+
|
54
45
|
end
|
55
46
|
end
|
56
47
|
end
|
@@ -1,9 +1,6 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'tempfile'
|
4
2
|
require 'csv'
|
5
3
|
require 'fileutils'
|
6
|
-
require 'pdfinfo'
|
7
4
|
|
8
5
|
require 'libis/tools/extend/string'
|
9
6
|
require 'libis/tools/logger'
|
@@ -15,24 +12,19 @@ require 'libis/format'
|
|
15
12
|
module Libis
|
16
13
|
module Format
|
17
14
|
module Tool
|
15
|
+
|
18
16
|
class PdfToPdfa
|
19
17
|
include ::Libis::Tools::Logger
|
20
18
|
|
21
|
-
def self.
|
22
|
-
|
23
|
-
result.zero?
|
19
|
+
def self.run(source, target = nil, options = {})
|
20
|
+
self.new.run source, target, options
|
24
21
|
end
|
25
22
|
|
26
|
-
def
|
27
|
-
new.run source, target, **options
|
28
|
-
end
|
23
|
+
def run(source, target = nil, options = nil)
|
29
24
|
|
30
|
-
def run(source, target = nil, **options)
|
31
25
|
tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
|
32
26
|
target ||= tmp_target
|
33
27
|
|
34
|
-
metadata = get_metadata(source)
|
35
|
-
|
36
28
|
icc_info = icc_options(options[:colorspace])
|
37
29
|
|
38
30
|
icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
|
@@ -40,62 +32,52 @@ module Libis
|
|
40
32
|
|
41
33
|
def_filename = Tools::TempFile.name('PDFA_def', '.ps')
|
42
34
|
File.open(def_filename, 'w') do |f|
|
43
|
-
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
|
44
|
-
|
45
|
-
|
46
|
-
.gsub('[**METADATA**]', metadata)
|
35
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
36
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
37
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
47
38
|
end
|
48
39
|
|
49
40
|
timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
|
50
41
|
result = Libis::Tools::Command.run(
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
source,
|
63
|
-
timeout:,
|
64
|
-
kill_after: timeout * 2
|
42
|
+
Libis::Format::Config[:ghostscript_cmd],
|
43
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
44
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
45
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
46
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
47
|
+
"-sOutputICCProfile=#{icc_file}",
|
48
|
+
'-o', File.absolute_path(target),
|
49
|
+
def_filename,
|
50
|
+
source,
|
51
|
+
timeout: timeout,
|
52
|
+
kill_after: timeout * 2
|
65
53
|
)
|
66
54
|
|
67
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
68
|
-
raise "#{self.class} failed with error #{result[:status]}: \n#{(result[:out] + result[:err]).join("\n")}" if result[:status] != 0
|
55
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
69
56
|
|
70
57
|
FileUtils.rm [icc_file, def_filename].compact, force: true
|
71
58
|
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
59
|
+
unless Format::Tool::PdfaValidator.run(target)
|
60
|
+
result[:status] = -999
|
61
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
62
|
+
end
|
63
|
+
|
64
|
+
result
|
76
65
|
end
|
77
66
|
|
78
|
-
private
|
79
67
|
|
80
|
-
|
81
|
-
info = Pdfinfo.new(source)
|
82
|
-
metadata = "/Title (#{info.title})"
|
83
|
-
metadata += "\n /Author (#{info.author})" if info.author
|
84
|
-
metadata += "\n /Subject (#{info.subject})" if info.subject
|
85
|
-
metadata += "\n /Keywords (#{info.keywords})" if info.keywords
|
86
|
-
metadata += "\n /Creator (#{info.creator})" if info.creator
|
87
|
-
metadata
|
88
|
-
end
|
68
|
+
private
|
89
69
|
|
90
70
|
def icc_options(colorspace)
|
91
71
|
case colorspace.to_s.downcase
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
72
|
+
when 'cmyk'
|
73
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
74
|
+
else
|
75
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
96
76
|
end
|
97
77
|
end
|
78
|
+
|
98
79
|
end
|
80
|
+
|
99
81
|
end
|
100
82
|
end
|
101
83
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'fileutils'
|
4
2
|
|
5
3
|
require 'libis/tools/extend/string'
|
@@ -11,58 +9,65 @@ require 'libis/format/config'
|
|
11
9
|
module Libis
|
12
10
|
module Format
|
13
11
|
module Tool
|
12
|
+
|
14
13
|
class PdfaValidator
|
15
14
|
include ::Libis::Tools::Logger
|
16
15
|
|
17
16
|
def self.run(source)
|
18
|
-
new.run source
|
17
|
+
self.new.run source
|
19
18
|
end
|
20
19
|
|
21
20
|
def run(source)
|
21
|
+
|
22
22
|
src_file = File.absolute_path(source)
|
23
23
|
|
24
24
|
timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
|
25
|
-
result = nil
|
26
25
|
if (pdfa = Libis::Format::Config[:pdfa_cmd])
|
27
26
|
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
28
27
|
previous_wd = Dir.getwd
|
29
28
|
Dir.chdir(Dir.tmpdir)
|
30
29
|
|
31
30
|
result = Libis::Tools::Command.run(
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
31
|
+
pdfa,
|
32
|
+
'--noxml',
|
33
|
+
'--level', 'B',
|
34
|
+
'--verb', '0',
|
35
|
+
src_file,
|
36
|
+
timeout: timeout,
|
37
|
+
kill_after: timeout * 2
|
39
38
|
)
|
40
39
|
|
41
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
42
|
-
raise "#{self.class} errors: #{result[:err].join("\n")}" unless
|
40
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
41
|
+
raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
|
43
42
|
|
44
43
|
Dir.chdir(previous_wd)
|
45
44
|
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
|
46
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
47
|
+
result[:out].join("\n")
|
48
|
+
return false
|
49
|
+
end
|
50
50
|
else
|
51
51
|
jar = Libis::Format::Config[:preflight_jar]
|
52
52
|
result = Libis::Tools::Command.run(
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
53
|
+
Libis::Format::Config[:java_cmd],
|
54
|
+
'-jar', jar,
|
55
|
+
src_file,
|
56
|
+
timeout: timeout,
|
57
|
+
kill_after: timeout * 2
|
58
58
|
)
|
59
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
59
60
|
|
60
|
-
|
61
|
-
|
61
|
+
unless result[:status] == 0
|
62
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
63
|
+
result[:out].join("\n")
|
64
|
+
return false
|
65
|
+
end
|
62
66
|
end
|
63
|
-
|
67
|
+
true
|
64
68
|
end
|
65
69
|
end
|
70
|
+
|
66
71
|
end
|
67
72
|
end
|
68
73
|
end
|
@@ -1,5 +1,4 @@
|
|
1
|
-
#
|
2
|
-
|
1
|
+
# noinspection RubyResolve
|
3
2
|
require 'fileutils'
|
4
3
|
|
5
4
|
require 'libis/tools/extend/string'
|
@@ -11,62 +10,52 @@ require 'libis/format/config'
|
|
11
10
|
module Libis
|
12
11
|
module Format
|
13
12
|
module Tool
|
13
|
+
|
14
14
|
class SpreadsheetToOds
|
15
15
|
include ::Libis::Tools::Logger
|
16
16
|
|
17
|
-
def self.installed?
|
18
|
-
result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
|
19
|
-
result.zero?
|
20
|
-
end
|
21
|
-
|
22
17
|
def self.run(source, target, options = {})
|
23
|
-
new.run source, target, options
|
18
|
+
self.new.run source, target, options
|
24
19
|
end
|
25
20
|
|
26
21
|
def run(source, target, options = {})
|
27
22
|
workdir = '/...'
|
28
23
|
workdir = Dir.tmpdir unless Dir.exist? workdir
|
29
24
|
|
30
|
-
workdir = File.join(workdir, rand(
|
25
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
31
26
|
FileUtils.mkpath(workdir)
|
32
27
|
|
33
28
|
src_file = File.join(workdir, File.basename(source))
|
34
29
|
FileUtils.symlink source, src_file
|
35
30
|
|
36
|
-
tgt_file = File.join(workdir,
|
31
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
|
37
32
|
|
38
33
|
export_filter = options[:export_filter] || 'ods'
|
39
34
|
|
40
35
|
timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
|
41
|
-
|
36
|
+
Libis::Format::Config[:timeouts][:office_to_pdf]
|
42
37
|
result = Libis::Tools::Command.run(
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
38
|
+
Libis::Format::Config[:soffice_cmd], '--headless',
|
39
|
+
"-env:UserInstallation=file://#{workdir}",
|
40
|
+
'--convert-to', export_filter,
|
41
|
+
'--outdir', workdir, src_file,
|
42
|
+
timeout: timeout,
|
43
|
+
kill_after: timeout * 2
|
49
44
|
)
|
50
45
|
|
51
|
-
raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
52
|
-
|
46
|
+
raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
|
53
47
|
warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
|
54
|
-
raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
48
|
+
raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
|
55
49
|
|
56
50
|
FileUtils.copy tgt_file, target, preserve: true
|
57
51
|
|
58
|
-
{
|
59
|
-
command: result,
|
60
|
-
files: [target]
|
61
|
-
}
|
62
52
|
ensure
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
nil
|
67
|
-
end
|
53
|
+
FileUtils.rmtree workdir rescue nil
|
54
|
+
|
55
|
+
result[:out]
|
68
56
|
end
|
69
57
|
end
|
58
|
+
|
70
59
|
end
|
71
60
|
end
|
72
61
|
end
|
data/lib/libis/format/tool.rb
CHANGED
@@ -1,17 +1,16 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
# code utf-8
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
module Tool
|
6
|
+
|
8
7
|
autoload :Droid, 'libis/format/tool/droid'
|
9
8
|
autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
|
10
9
|
autoload :Fido, 'libis/format/tool/fido'
|
11
10
|
autoload :FileTool, 'libis/format/tool/file_tool'
|
12
11
|
|
13
12
|
autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
|
14
|
-
autoload :FFMpeg, 'libis/format/tool/
|
13
|
+
autoload :FFMpeg, 'libis/format/tool/ffmpeg'
|
15
14
|
autoload :FopPdf, 'libis/format/tool/fop_pdf'
|
16
15
|
autoload :PdfCopy, 'libis/format/tool/pdf_copy'
|
17
16
|
autoload :PdfMerge, 'libis/format/tool/pdf_merge'
|
@@ -19,7 +18,7 @@ module Libis
|
|
19
18
|
autoload :PdfSplit, 'libis/format/tool/pdf_split'
|
20
19
|
autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
|
21
20
|
autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
|
22
|
-
|
21
|
+
|
23
22
|
end
|
24
23
|
end
|
25
24
|
end
|
data/lib/libis/format/version.rb
CHANGED
@@ -0,0 +1,71 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require 'yaml'
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
module Libis
|
6
|
+
module Format
|
7
|
+
class YamlLoader
|
8
|
+
# noinspection RubyResolve
|
9
|
+
include Singleton
|
10
|
+
|
11
|
+
def query(key, value)
|
12
|
+
case key.to_s.downcase.to_sym
|
13
|
+
when :name
|
14
|
+
return [database[value.to_s.upcase.to_sym]]
|
15
|
+
when :category
|
16
|
+
database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
|
17
|
+
when :puid
|
18
|
+
database.find_all { |_, info| info.puids.include?(value) }
|
19
|
+
when :mimetype
|
20
|
+
database.find_all { |_, info| info.mimetypes.include?(value) }
|
21
|
+
when :extension
|
22
|
+
database.find_all { |_, info| info.extensions.include?(value) }
|
23
|
+
else
|
24
|
+
return []
|
25
|
+
end.map(&:last)
|
26
|
+
end
|
27
|
+
|
28
|
+
def load_formats(file_or_hash)
|
29
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
30
|
+
hash.each do |category, format_list|
|
31
|
+
format_list.each do |format_name, format_info|
|
32
|
+
format_info.symbolize_keys!
|
33
|
+
format_name = format_name.to_sym
|
34
|
+
new_info = Libis::Format::Info.new(
|
35
|
+
name: format_name,
|
36
|
+
category: category.to_sym,
|
37
|
+
description: format_info[:NAME],
|
38
|
+
puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
|
39
|
+
mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
|
40
|
+
extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
|
41
|
+
)
|
42
|
+
if (old_info = database[format_name])
|
43
|
+
new_info = Libis::Format::Info.new(
|
44
|
+
name: format_name,
|
45
|
+
category: category.to_sym,
|
46
|
+
description: new_info.description.blank? ? old_info.description : new_info.description,
|
47
|
+
puids: (old_info.puids + new_info.puids).uniq,
|
48
|
+
mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
|
49
|
+
extensions: (old_info.extensions + new_info.extensions).uniq
|
50
|
+
)
|
51
|
+
end
|
52
|
+
database[format_name] = new_info
|
53
|
+
end
|
54
|
+
end
|
55
|
+
|
56
|
+
end
|
57
|
+
|
58
|
+
private
|
59
|
+
|
60
|
+
attr_reader :database
|
61
|
+
|
62
|
+
def initialize
|
63
|
+
@database = {}
|
64
|
+
format_database = Libis::Format::Config[:format_library_database]
|
65
|
+
load_formats(format_database)
|
66
|
+
end
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
end
|
71
|
+
end
|
data/lib/libis/format.rb
CHANGED
@@ -1,12 +1,13 @@
|
|
1
|
-
# frozen_string_literal: true
|
2
|
-
|
3
1
|
require 'libis/format/version'
|
4
2
|
|
5
3
|
module Libis
|
6
4
|
module Format
|
7
5
|
autoload :Config, 'libis/format/config'
|
8
|
-
|
9
|
-
autoload :
|
6
|
+
|
7
|
+
autoload :Info, 'libis/format/info'
|
8
|
+
autoload :Library, 'libis/format/library'
|
9
|
+
autoload :YamlLoader, 'libis/format/yaml_loader'
|
10
|
+
|
10
11
|
autoload :Identifier, 'libis/format/identifier'
|
11
12
|
|
12
13
|
autoload :Tool, 'libis/format/tool'
|
@@ -15,5 +16,6 @@ module Libis
|
|
15
16
|
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
16
17
|
DATA_DIR = File.join(ROOT_DIR, 'data')
|
17
18
|
TOOL_DIR = File.join(ROOT_DIR, 'tools')
|
19
|
+
|
18
20
|
end
|
19
|
-
end
|
21
|
+
end
|
data/lib/libis-format.rb
CHANGED
data/libis-format.gemspec
CHANGED
@@ -1,48 +1,42 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
|
3
|
-
lib = File.expand_path('lib',
|
3
|
+
lib = File.expand_path('../lib', __FILE__)
|
4
4
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
5
5
|
|
6
|
-
require 'libis/format/version'
|
7
6
|
require 'bundler'
|
7
|
+
require 'libis/format/version'
|
8
8
|
|
9
9
|
Gem::Specification.new do |spec|
|
10
10
|
spec.name = 'libis-format'
|
11
11
|
spec.version = Libis::Format::VERSION
|
12
12
|
spec.authors = ['Kris Dekeyser']
|
13
13
|
spec.email = ['kris.dekeyser@libis.be']
|
14
|
-
spec.summary =
|
15
|
-
spec.description =
|
14
|
+
spec.summary = %q{LIBIS File format format services.}
|
15
|
+
spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
|
16
16
|
spec.homepage = ''
|
17
17
|
spec.license = 'MIT'
|
18
18
|
|
19
19
|
spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
|
20
|
-
spec.required_ruby_version = '>= 3.2'
|
21
20
|
|
22
|
-
spec.files
|
23
|
-
f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
|
24
|
-
end
|
21
|
+
spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
|
25
22
|
spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
|
23
|
+
# spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
|
26
24
|
spec.require_paths = ['lib']
|
27
25
|
|
28
|
-
spec.
|
29
|
-
spec.
|
30
|
-
spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
|
31
|
-
spec.add_runtime_dependency 'libis-tools', '~> 1.1'
|
32
|
-
spec.add_runtime_dependency 'mini_magick', '~> 4.12'
|
33
|
-
spec.add_runtime_dependency 'naturally', '~> 2.2'
|
34
|
-
spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
|
35
|
-
spec.add_runtime_dependency 'os', '~> 1.1'
|
36
|
-
spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
|
37
|
-
spec.add_runtime_dependency 'pdfkit', '~> 0.8'
|
38
|
-
|
26
|
+
spec.add_development_dependency 'rake', '~> 10.3'
|
27
|
+
spec.add_development_dependency 'rspec', '~> 3.1'
|
39
28
|
spec.add_development_dependency 'awesome_print'
|
40
|
-
spec.add_development_dependency 'equivalent-xml'
|
41
|
-
spec.
|
42
|
-
spec.add_development_dependency 'rspec'
|
43
|
-
if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
|
29
|
+
spec.add_development_dependency 'equivalent-xml', '~> 0.5'
|
30
|
+
if spec.platform == Gem::Platform::JAVA
|
44
31
|
spec.add_development_dependency 'saxon-xslt'
|
45
32
|
else
|
46
33
|
spec.add_development_dependency 'nokogiri'
|
47
34
|
end
|
35
|
+
|
36
|
+
spec.add_runtime_dependency 'libis-tools', '~> 1.0'
|
37
|
+
spec.add_runtime_dependency 'os', '= 0.9.6'
|
38
|
+
spec.add_runtime_dependency 'mini_magick', '~> 4.3'
|
39
|
+
spec.add_runtime_dependency 'deep_dive', '~> 0.3'
|
40
|
+
spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
|
41
|
+
spec.add_runtime_dependency 'naturally', '~> 2.1'
|
48
42
|
end
|