libis-format 1.3.4 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +45 -250
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +19 -25
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +33 -6
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +86 -128
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'fileutils'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,57 +9,65 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfaValidator
15
14
  include ::Libis::Tools::Logger
16
15
 
17
16
  def self.run(source)
18
- new.run source
17
+ self.new.run source
19
18
  end
20
19
 
21
20
  def run(source)
21
+
22
22
  src_file = File.absolute_path(source)
23
23
 
24
24
  timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
25
- result = nil
26
25
  if (pdfa = Libis::Format::Config[:pdfa_cmd])
27
26
  # Keep it clean: tool generates fontconfig/ cache dir in current working dir
28
27
  previous_wd = Dir.getwd
29
28
  Dir.chdir(Dir.tmpdir)
30
29
 
31
30
  result = Libis::Tools::Command.run(
32
- pdfa,
33
- '--noxml',
34
- '--level', 'B',
35
- '--verb', '0',
36
- src_file,
37
- timeout:,
38
- kill_after: timeout * 2
31
+ pdfa,
32
+ '--noxml',
33
+ '--level', 'B',
34
+ '--verb', '0',
35
+ src_file,
36
+ timeout: timeout,
37
+ kill_after: timeout * 2
39
38
  )
40
39
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
40
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
41
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
42
42
 
43
43
  Dir.chdir(previous_wd)
44
44
 
45
- out, err = result[:out].partition { |line| line =~ /^VLD-\[PASS\]/ }
46
- result[:out] = out
47
- result[:err] += err
48
-
45
+ unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
46
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
47
+ result[:out].join("\n")
48
+ return false
49
+ end
49
50
  else
50
51
  jar = Libis::Format::Config[:preflight_jar]
51
52
  result = Libis::Tools::Command.run(
52
- Libis::Format::Config[:java_cmd],
53
- '-jar', jar,
54
- src_file,
55
- timeout:,
56
- kill_after: timeout * 2
53
+ Libis::Format::Config[:java_cmd],
54
+ '-jar', jar,
55
+ src_file,
56
+ timeout: timeout,
57
+ kill_after: timeout * 2
57
58
  )
59
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
58
60
 
59
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
60
-
61
+ unless result[:status] == 0
62
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
63
+ result[:out].join("\n")
64
+ return false
65
+ end
61
66
  end
62
- result
67
+ true
63
68
  end
64
69
  end
70
+
65
71
  end
66
72
  end
67
73
  end
@@ -1,5 +1,4 @@
1
- # frozen_string_literal: true
2
-
1
+ # noinspection RubyResolve
3
2
  require 'fileutils'
4
3
 
5
4
  require 'libis/tools/extend/string'
@@ -11,62 +10,52 @@ require 'libis/format/config'
11
10
  module Libis
12
11
  module Format
13
12
  module Tool
13
+
14
14
  class SpreadsheetToOds
15
15
  include ::Libis::Tools::Logger
16
16
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
- result.zero?
20
- end
21
-
22
17
  def self.run(source, target, options = {})
23
- new.run source, target, options
18
+ self.new.run source, target, options
24
19
  end
25
20
 
26
21
  def run(source, target, options = {})
27
22
  workdir = '/...'
28
23
  workdir = Dir.tmpdir unless Dir.exist? workdir
29
24
 
30
- workdir = File.join(workdir, rand(1_000_000).to_s)
25
+ workdir = File.join(workdir, rand(1000000).to_s)
31
26
  FileUtils.mkpath(workdir)
32
27
 
33
28
  src_file = File.join(workdir, File.basename(source))
34
29
  FileUtils.symlink source, src_file
35
30
 
36
- tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.ods")
31
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
37
32
 
38
33
  export_filter = options[:export_filter] || 'ods'
39
34
 
40
35
  timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
41
- Libis::Format::Config[:timeouts][:office_to_pdf]
36
+ Libis::Format::Config[:timeouts][:office_to_pdf]
42
37
  result = Libis::Tools::Command.run(
43
- Libis::Format::Config[:soffice_cmd], '--headless',
44
- "-env:UserInstallation=file://#{workdir}",
45
- '--convert-to', export_filter,
46
- '--outdir', workdir, src_file,
47
- timeout:,
48
- kill_after: timeout * 2
38
+ Libis::Format::Config[:soffice_cmd], '--headless',
39
+ "-env:UserInstallation=file://#{workdir}",
40
+ '--convert-to', export_filter,
41
+ '--outdir', workdir, src_file,
42
+ timeout: timeout,
43
+ kill_after: timeout * 2
49
44
  )
50
45
 
51
- raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
52
-
46
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
53
47
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
54
- raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
48
+ raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
55
49
 
56
50
  FileUtils.copy tgt_file, target, preserve: true
57
51
 
58
- {
59
- command: result,
60
- files: [target]
61
- }
62
52
  ensure
63
- begin
64
- FileUtils.rmtree workdir
65
- rescue StandardError
66
- nil
67
- end
53
+ FileUtils.rmtree workdir rescue nil
54
+
55
+ result[:out]
68
56
  end
69
57
  end
58
+
70
59
  end
71
60
  end
72
61
  end
@@ -1,17 +1,16 @@
1
- # frozen_string_literal: true
2
-
3
1
  # code utf-8
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  module Tool
6
+
8
7
  autoload :Droid, 'libis/format/tool/droid'
9
8
  autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
10
9
  autoload :Fido, 'libis/format/tool/fido'
11
10
  autoload :FileTool, 'libis/format/tool/file_tool'
12
11
 
13
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
14
- autoload :FFMpeg, 'libis/format/tool/ff_mpeg'
13
+ autoload :FFMpeg, 'libis/format/tool/ffmpeg'
15
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
16
15
  autoload :PdfCopy, 'libis/format/tool/pdf_copy'
17
16
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
@@ -19,7 +18,7 @@ module Libis
19
18
  autoload :PdfSplit, 'libis/format/tool/pdf_split'
20
19
  autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
21
20
  autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
22
- autoload :MsgToPdf, 'libis/format/tool/msg_to_pdf'
21
+
23
22
  end
24
23
  end
25
24
  end
@@ -1,7 +1,5 @@
1
- # frozen_string_literal: true
2
-
3
1
  module Libis
4
2
  module Format
5
- VERSION = '1.3.4'
3
+ VERSION = '2.0.0'
6
4
  end
7
5
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+ require 'yaml'
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+ class YamlLoader
8
+ # noinspection RubyResolve
9
+ include Singleton
10
+
11
+ def query(key, value)
12
+ case key.to_s.downcase.to_sym
13
+ when :name
14
+ return [database[value.to_s.upcase.to_sym]]
15
+ when :category
16
+ database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
17
+ when :puid
18
+ database.find_all { |_, info| info.puids.include?(value) }
19
+ when :mimetype
20
+ database.find_all { |_, info| info.mimetypes.include?(value) }
21
+ when :extension
22
+ database.find_all { |_, info| info.extensions.include?(value) }
23
+ else
24
+ return []
25
+ end.map(&:last)
26
+ end
27
+
28
+ def load_formats(file_or_hash)
29
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
30
+ hash.each do |category, format_list|
31
+ format_list.each do |format_name, format_info|
32
+ format_info.symbolize_keys!
33
+ format_name = format_name.to_sym
34
+ new_info = Libis::Format::Info.new(
35
+ name: format_name,
36
+ category: category.to_sym,
37
+ description: format_info[:NAME],
38
+ puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
39
+ mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
40
+ extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
41
+ )
42
+ if (old_info = database[format_name])
43
+ new_info = Libis::Format::Info.new(
44
+ name: format_name,
45
+ category: category.to_sym,
46
+ description: new_info.description.blank? ? old_info.description : new_info.description,
47
+ puids: (old_info.puids + new_info.puids).uniq,
48
+ mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
49
+ extensions: (old_info.extensions + new_info.extensions).uniq
50
+ )
51
+ end
52
+ database[format_name] = new_info
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :database
61
+
62
+ def initialize
63
+ @database = {}
64
+ format_database = Libis::Format::Config[:format_library_database]
65
+ load_formats(format_database)
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+ end
data/lib/libis/format.rb CHANGED
@@ -1,12 +1,13 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format/version'
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  autoload :Config, 'libis/format/config'
8
- autoload :TypeDatabase, 'libis/format/type_database'
9
- autoload :TypeDatabaseImpl, 'libis/format/type_database_impl'
6
+
7
+ autoload :Info, 'libis/format/info'
8
+ autoload :Library, 'libis/format/library'
9
+ autoload :YamlLoader, 'libis/format/yaml_loader'
10
+
10
11
  autoload :Identifier, 'libis/format/identifier'
11
12
 
12
13
  autoload :Tool, 'libis/format/tool'
@@ -15,5 +16,6 @@ module Libis
15
16
  ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
16
17
  DATA_DIR = File.join(ROOT_DIR, 'data')
17
18
  TOOL_DIR = File.join(ROOT_DIR, 'tools')
19
+
18
20
  end
19
- end
21
+ end
data/lib/libis-format.rb CHANGED
@@ -1,3 +1 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format'
data/libis-format.gemspec CHANGED
@@ -1,48 +1,42 @@
1
- # frozen_string_literal: true
1
+ # coding: utf-8
2
2
 
3
- lib = File.expand_path('lib', __dir__)
3
+ lib = File.expand_path('../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
- require 'libis/format/version'
7
6
  require 'bundler'
7
+ require 'libis/format/version'
8
8
 
9
9
  Gem::Specification.new do |spec|
10
10
  spec.name = 'libis-format'
11
11
  spec.version = Libis::Format::VERSION
12
12
  spec.authors = ['Kris Dekeyser']
13
13
  spec.email = ['kris.dekeyser@libis.be']
14
- spec.summary = 'LIBIS File format format services.'
15
- spec.description = 'Collection of tools and classes that help to identify file formats and create derivative copies.'
14
+ spec.summary = %q{LIBIS File format format services.}
15
+ spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
16
16
  spec.homepage = ''
17
17
  spec.license = 'MIT'
18
18
 
19
19
  spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
20
- spec.required_ruby_version = '>= 3.2'
21
20
 
22
- spec.files = `git ls-files -z`.split("\x0").select do |f|
23
- f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
24
- end
21
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
22
  spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
23
+ # spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
24
  spec.require_paths = ['lib']
27
25
 
28
- spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
29
- spec.add_runtime_dependency 'deep_dive', '~> 0.3'
30
- spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
31
- spec.add_runtime_dependency 'libis-tools', '~> 1.1'
32
- spec.add_runtime_dependency 'mini_magick', '~> 5.0.1'
33
- spec.add_runtime_dependency 'naturally', '~> 2.2'
34
- spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
35
- spec.add_runtime_dependency 'os', '~> 1.1'
36
- spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
37
- spec.add_runtime_dependency 'pdfkit', '~> 0.8'
38
-
26
+ spec.add_development_dependency 'rake', '~> 10.3'
27
+ spec.add_development_dependency 'rspec', '~> 3.1'
39
28
  spec.add_development_dependency 'awesome_print'
40
- spec.add_development_dependency 'equivalent-xml'
41
- spec.add_development_dependency 'rake'
42
- spec.add_development_dependency 'rspec'
43
- if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.5'
30
+ if spec.platform == Gem::Platform::JAVA
44
31
  spec.add_development_dependency 'saxon-xslt'
45
32
  else
46
33
  spec.add_development_dependency 'nokogiri'
47
34
  end
35
+
36
+ spec.add_runtime_dependency 'libis-tools', '~> 1.0'
37
+ spec.add_runtime_dependency 'os', '= 0.9.6'
38
+ spec.add_runtime_dependency 'mini_magick', '~> 4.3'
39
+ spec.add_runtime_dependency 'deep_dive', '~> 0.3'
40
+ spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
41
+ spec.add_runtime_dependency 'naturally', '~> 2.1'
48
42
  end
data/tools/PdfTool.jar CHANGED
Binary file
Binary file