libis-format 1.3.4 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +45 -250
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +19 -25
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +33 -6
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +86 -128
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'fileutils'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,57 +9,65 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfaValidator
15
14
  include ::Libis::Tools::Logger
16
15
 
17
16
  def self.run(source)
18
- new.run source
17
+ self.new.run source
19
18
  end
20
19
 
21
20
  def run(source)
21
+
22
22
  src_file = File.absolute_path(source)
23
23
 
24
24
  timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
25
- result = nil
26
25
  if (pdfa = Libis::Format::Config[:pdfa_cmd])
27
26
  # Keep it clean: tool generates fontconfig/ cache dir in current working dir
28
27
  previous_wd = Dir.getwd
29
28
  Dir.chdir(Dir.tmpdir)
30
29
 
31
30
  result = Libis::Tools::Command.run(
32
- pdfa,
33
- '--noxml',
34
- '--level', 'B',
35
- '--verb', '0',
36
- src_file,
37
- timeout:,
38
- kill_after: timeout * 2
31
+ pdfa,
32
+ '--noxml',
33
+ '--level', 'B',
34
+ '--verb', '0',
35
+ src_file,
36
+ timeout: timeout,
37
+ kill_after: timeout * 2
39
38
  )
40
39
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
40
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
41
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
42
42
 
43
43
  Dir.chdir(previous_wd)
44
44
 
45
- out, err = result[:out].partition { |line| line =~ /^VLD-\[PASS\]/ }
46
- result[:out] = out
47
- result[:err] += err
48
-
45
+ unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
46
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
47
+ result[:out].join("\n")
48
+ return false
49
+ end
49
50
  else
50
51
  jar = Libis::Format::Config[:preflight_jar]
51
52
  result = Libis::Tools::Command.run(
52
- Libis::Format::Config[:java_cmd],
53
- '-jar', jar,
54
- src_file,
55
- timeout:,
56
- kill_after: timeout * 2
53
+ Libis::Format::Config[:java_cmd],
54
+ '-jar', jar,
55
+ src_file,
56
+ timeout: timeout,
57
+ kill_after: timeout * 2
57
58
  )
59
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
58
60
 
59
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
60
-
61
+ unless result[:status] == 0
62
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
63
+ result[:out].join("\n")
64
+ return false
65
+ end
61
66
  end
62
- result
67
+ true
63
68
  end
64
69
  end
70
+
65
71
  end
66
72
  end
67
73
  end
@@ -1,5 +1,4 @@
1
- # frozen_string_literal: true
2
-
1
+ # noinspection RubyResolve
3
2
  require 'fileutils'
4
3
 
5
4
  require 'libis/tools/extend/string'
@@ -11,62 +10,52 @@ require 'libis/format/config'
11
10
  module Libis
12
11
  module Format
13
12
  module Tool
13
+
14
14
  class SpreadsheetToOds
15
15
  include ::Libis::Tools::Logger
16
16
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
- result.zero?
20
- end
21
-
22
17
  def self.run(source, target, options = {})
23
- new.run source, target, options
18
+ self.new.run source, target, options
24
19
  end
25
20
 
26
21
  def run(source, target, options = {})
27
22
  workdir = '/...'
28
23
  workdir = Dir.tmpdir unless Dir.exist? workdir
29
24
 
30
- workdir = File.join(workdir, rand(1_000_000).to_s)
25
+ workdir = File.join(workdir, rand(1000000).to_s)
31
26
  FileUtils.mkpath(workdir)
32
27
 
33
28
  src_file = File.join(workdir, File.basename(source))
34
29
  FileUtils.symlink source, src_file
35
30
 
36
- tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.ods")
31
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
37
32
 
38
33
  export_filter = options[:export_filter] || 'ods'
39
34
 
40
35
  timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
41
- Libis::Format::Config[:timeouts][:office_to_pdf]
36
+ Libis::Format::Config[:timeouts][:office_to_pdf]
42
37
  result = Libis::Tools::Command.run(
43
- Libis::Format::Config[:soffice_cmd], '--headless',
44
- "-env:UserInstallation=file://#{workdir}",
45
- '--convert-to', export_filter,
46
- '--outdir', workdir, src_file,
47
- timeout:,
48
- kill_after: timeout * 2
38
+ Libis::Format::Config[:soffice_cmd], '--headless',
39
+ "-env:UserInstallation=file://#{workdir}",
40
+ '--convert-to', export_filter,
41
+ '--outdir', workdir, src_file,
42
+ timeout: timeout,
43
+ kill_after: timeout * 2
49
44
  )
50
45
 
51
- raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
52
-
46
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
53
47
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
54
- raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
48
+ raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
55
49
 
56
50
  FileUtils.copy tgt_file, target, preserve: true
57
51
 
58
- {
59
- command: result,
60
- files: [target]
61
- }
62
52
  ensure
63
- begin
64
- FileUtils.rmtree workdir
65
- rescue StandardError
66
- nil
67
- end
53
+ FileUtils.rmtree workdir rescue nil
54
+
55
+ result[:out]
68
56
  end
69
57
  end
58
+
70
59
  end
71
60
  end
72
61
  end
@@ -1,17 +1,16 @@
1
- # frozen_string_literal: true
2
-
3
1
  # code utf-8
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  module Tool
6
+
8
7
  autoload :Droid, 'libis/format/tool/droid'
9
8
  autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
10
9
  autoload :Fido, 'libis/format/tool/fido'
11
10
  autoload :FileTool, 'libis/format/tool/file_tool'
12
11
 
13
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
14
- autoload :FFMpeg, 'libis/format/tool/ff_mpeg'
13
+ autoload :FFMpeg, 'libis/format/tool/ffmpeg'
15
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
16
15
  autoload :PdfCopy, 'libis/format/tool/pdf_copy'
17
16
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
@@ -19,7 +18,7 @@ module Libis
19
18
  autoload :PdfSplit, 'libis/format/tool/pdf_split'
20
19
  autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
21
20
  autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
22
- autoload :MsgToPdf, 'libis/format/tool/msg_to_pdf'
21
+
23
22
  end
24
23
  end
25
24
  end
@@ -1,7 +1,5 @@
1
- # frozen_string_literal: true
2
-
3
1
  module Libis
4
2
  module Format
5
- VERSION = '1.3.4'
3
+ VERSION = '2.0.0'
6
4
  end
7
5
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+ require 'yaml'
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+ class YamlLoader
8
+ # noinspection RubyResolve
9
+ include Singleton
10
+
11
+ def query(key, value)
12
+ case key.to_s.downcase.to_sym
13
+ when :name
14
+ return [database[value.to_s.upcase.to_sym]]
15
+ when :category
16
+ database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
17
+ when :puid
18
+ database.find_all { |_, info| info.puids.include?(value) }
19
+ when :mimetype
20
+ database.find_all { |_, info| info.mimetypes.include?(value) }
21
+ when :extension
22
+ database.find_all { |_, info| info.extensions.include?(value) }
23
+ else
24
+ return []
25
+ end.map(&:last)
26
+ end
27
+
28
+ def load_formats(file_or_hash)
29
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
30
+ hash.each do |category, format_list|
31
+ format_list.each do |format_name, format_info|
32
+ format_info.symbolize_keys!
33
+ format_name = format_name.to_sym
34
+ new_info = Libis::Format::Info.new(
35
+ name: format_name,
36
+ category: category.to_sym,
37
+ description: format_info[:NAME],
38
+ puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
39
+ mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
40
+ extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
41
+ )
42
+ if (old_info = database[format_name])
43
+ new_info = Libis::Format::Info.new(
44
+ name: format_name,
45
+ category: category.to_sym,
46
+ description: new_info.description.blank? ? old_info.description : new_info.description,
47
+ puids: (old_info.puids + new_info.puids).uniq,
48
+ mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
49
+ extensions: (old_info.extensions + new_info.extensions).uniq
50
+ )
51
+ end
52
+ database[format_name] = new_info
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :database
61
+
62
+ def initialize
63
+ @database = {}
64
+ format_database = Libis::Format::Config[:format_library_database]
65
+ load_formats(format_database)
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+ end
data/lib/libis/format.rb CHANGED
@@ -1,12 +1,13 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format/version'
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  autoload :Config, 'libis/format/config'
8
- autoload :TypeDatabase, 'libis/format/type_database'
9
- autoload :TypeDatabaseImpl, 'libis/format/type_database_impl'
6
+
7
+ autoload :Info, 'libis/format/info'
8
+ autoload :Library, 'libis/format/library'
9
+ autoload :YamlLoader, 'libis/format/yaml_loader'
10
+
10
11
  autoload :Identifier, 'libis/format/identifier'
11
12
 
12
13
  autoload :Tool, 'libis/format/tool'
@@ -15,5 +16,6 @@ module Libis
15
16
  ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
16
17
  DATA_DIR = File.join(ROOT_DIR, 'data')
17
18
  TOOL_DIR = File.join(ROOT_DIR, 'tools')
19
+
18
20
  end
19
- end
21
+ end
data/lib/libis-format.rb CHANGED
@@ -1,3 +1 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format'
data/libis-format.gemspec CHANGED
@@ -1,48 +1,42 @@
1
- # frozen_string_literal: true
1
+ # coding: utf-8
2
2
 
3
- lib = File.expand_path('lib', __dir__)
3
+ lib = File.expand_path('../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
- require 'libis/format/version'
7
6
  require 'bundler'
7
+ require 'libis/format/version'
8
8
 
9
9
  Gem::Specification.new do |spec|
10
10
  spec.name = 'libis-format'
11
11
  spec.version = Libis::Format::VERSION
12
12
  spec.authors = ['Kris Dekeyser']
13
13
  spec.email = ['kris.dekeyser@libis.be']
14
- spec.summary = 'LIBIS File format format services.'
15
- spec.description = 'Collection of tools and classes that help to identify file formats and create derivative copies.'
14
+ spec.summary = %q{LIBIS File format format services.}
15
+ spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
16
16
  spec.homepage = ''
17
17
  spec.license = 'MIT'
18
18
 
19
19
  spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
20
- spec.required_ruby_version = '>= 3.2'
21
20
 
22
- spec.files = `git ls-files -z`.split("\x0").select do |f|
23
- f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
24
- end
21
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
22
  spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
23
+ # spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
24
  spec.require_paths = ['lib']
27
25
 
28
- spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
29
- spec.add_runtime_dependency 'deep_dive', '~> 0.3'
30
- spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
31
- spec.add_runtime_dependency 'libis-tools', '~> 1.1'
32
- spec.add_runtime_dependency 'mini_magick', '~> 5.0.1'
33
- spec.add_runtime_dependency 'naturally', '~> 2.2'
34
- spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
35
- spec.add_runtime_dependency 'os', '~> 1.1'
36
- spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
37
- spec.add_runtime_dependency 'pdfkit', '~> 0.8'
38
-
26
+ spec.add_development_dependency 'rake', '~> 10.3'
27
+ spec.add_development_dependency 'rspec', '~> 3.1'
39
28
  spec.add_development_dependency 'awesome_print'
40
- spec.add_development_dependency 'equivalent-xml'
41
- spec.add_development_dependency 'rake'
42
- spec.add_development_dependency 'rspec'
43
- if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.5'
30
+ if spec.platform == Gem::Platform::JAVA
44
31
  spec.add_development_dependency 'saxon-xslt'
45
32
  else
46
33
  spec.add_development_dependency 'nokogiri'
47
34
  end
35
+
36
+ spec.add_runtime_dependency 'libis-tools', '~> 1.0'
37
+ spec.add_runtime_dependency 'os', '= 0.9.6'
38
+ spec.add_runtime_dependency 'mini_magick', '~> 4.3'
39
+ spec.add_runtime_dependency 'deep_dive', '~> 0.3'
40
+ spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
41
+ spec.add_runtime_dependency 'naturally', '~> 2.1'
48
42
  end
data/tools/PdfTool.jar CHANGED
Binary file
Binary file