libis-format 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (78) hide show
  1. checksums.yaml +4 -4
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +20 -0
  4. data/.travis.yml +70 -0
  5. data/Gemfile +0 -12
  6. data/README.md +2 -2
  7. data/Rakefile +8 -0
  8. data/base/Dockerfile +35 -0
  9. data/base/Dockerfile.alpine +20 -0
  10. data/base/Dockerfile.rvm +56 -0
  11. data/base/rework_path +20 -0
  12. data/bin/{pdf_tool → pdf_copy} +2 -3
  13. data/data/PDFA_def.ps +3 -3
  14. data/data/eciRGB_v2.icc +0 -0
  15. data/data/types.yml +4 -17
  16. data/docker_cfg.yml +1 -0
  17. data/lib/libis/format/cli/convert.rb +4 -4
  18. data/lib/libis/format/cli/prompt_helper.rb +24 -32
  19. data/lib/libis/format/command_line.rb +3 -2
  20. data/lib/libis/format/config.rb +23 -19
  21. data/lib/libis/format/converter/audio_converter.rb +31 -56
  22. data/lib/libis/format/converter/base.rb +36 -16
  23. data/lib/libis/format/converter/chain.rb +32 -52
  24. data/lib/libis/format/converter/fop_pdf_converter.rb +8 -4
  25. data/lib/libis/format/converter/image_assembler.rb +82 -0
  26. data/lib/libis/format/converter/image_converter.rb +40 -153
  27. data/lib/libis/format/converter/image_splitter.rb +80 -0
  28. data/lib/libis/format/converter/image_watermarker.rb +261 -0
  29. data/lib/libis/format/converter/jp2_converter.rb +38 -36
  30. data/lib/libis/format/converter/office_converter.rb +28 -22
  31. data/lib/libis/format/converter/pdf_assembler.rb +66 -0
  32. data/lib/libis/format/converter/pdf_converter.rb +52 -200
  33. data/lib/libis/format/converter/pdf_optimizer.rb +70 -0
  34. data/lib/libis/format/converter/pdf_splitter.rb +65 -0
  35. data/lib/libis/format/converter/pdf_watermarker.rb +110 -0
  36. data/lib/libis/format/converter/repository.rb +13 -7
  37. data/lib/libis/format/converter/spreadsheet_converter.rb +16 -10
  38. data/lib/libis/format/converter/video_converter.rb +58 -47
  39. data/lib/libis/format/converter/xslt_converter.rb +11 -13
  40. data/lib/libis/format/converter.rb +1 -1
  41. data/lib/libis/format/identifier.rb +46 -44
  42. data/lib/libis/format/info.rb +27 -0
  43. data/lib/libis/format/library.rb +147 -0
  44. data/lib/libis/format/tool/droid.rb +30 -29
  45. data/lib/libis/format/tool/extension_identification.rb +26 -24
  46. data/lib/libis/format/tool/{ff_mpeg.rb → ffmpeg.rb} +10 -17
  47. data/lib/libis/format/tool/fido.rb +27 -22
  48. data/lib/libis/format/tool/file_tool.rb +24 -11
  49. data/lib/libis/format/tool/fop_pdf.rb +14 -25
  50. data/lib/libis/format/tool/identification_tool.rb +40 -38
  51. data/lib/libis/format/tool/office_to_pdf.rb +18 -30
  52. data/lib/libis/format/tool/pdf_copy.rb +47 -0
  53. data/lib/libis/format/tool/pdf_merge.rb +48 -0
  54. data/lib/libis/format/tool/pdf_optimizer.rb +19 -22
  55. data/lib/libis/format/tool/pdf_split.rb +47 -0
  56. data/lib/libis/format/tool/pdf_to_pdfa.rb +31 -45
  57. data/lib/libis/format/tool/pdfa_validator.rb +30 -24
  58. data/lib/libis/format/tool/spreadsheet_to_ods.rb +18 -29
  59. data/lib/libis/format/tool.rb +3 -4
  60. data/lib/libis/format/version.rb +1 -3
  61. data/lib/libis/format/yaml_loader.rb +71 -0
  62. data/lib/libis/format.rb +7 -5
  63. data/lib/libis-format.rb +0 -2
  64. data/libis-format.gemspec +18 -24
  65. data/tools/PdfTool.jar +0 -0
  66. data/tools/pdfbox/pdfbox-app-2.0.13.jar +0 -0
  67. data/tools/pdfbox/{preflight-app-3.0.3.jar → preflight-app-2.0.13.jar} +0 -0
  68. metadata +85 -125
  69. data/data/AdobeRGB1998.icc +0 -0
  70. data/lib/libis/format/converter/email_converter.rb +0 -35
  71. data/lib/libis/format/tool/msg_to_pdf.rb +0 -270
  72. data/lib/libis/format/tool/pdf_tool.rb +0 -52
  73. data/lib/libis/format/type_database.rb +0 -156
  74. data/lib/libis/format/type_database_impl.rb +0 -153
  75. data/tools/pdf2pdfa +0 -395
  76. data/tools/pdfbox/pdfbox-app-3.0.3.jar +0 -0
  77. /data/bin/{droid_tool → droid} +0 -0
  78. /data/bin/{fido_tool → fido} +0 -0
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'os'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,38 +9,37 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfOptimizer
15
14
  include ::Libis::Tools::Logger
16
15
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd], '--version')
19
- (result[:status]).zero?
20
- end
21
-
22
16
  def self.run(source, target, quality)
23
- new.run source, target, quality
17
+ self.new.run source, target, quality
24
18
  end
25
19
 
26
20
  def run(source, target, quality)
27
- timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
28
- args = [
29
- Libis::Format::Config[:ghostscript_cmd],
30
- '-sDEVICE=pdfwrite',
31
- '-dCompatibilityLevel=1.4',
32
- "-dPDFSETTINGS=/#{quality}",
33
- '-dNOPAUSE',
34
- '-dBATCH',
35
- "-sOutputFile=#{target}",
36
- source.to_s
37
- ]
38
-
39
- result = Libis::Tools::Command.run(*args, timeout:, kill_after: timeout * 2)
40
21
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
22
+ timeout = Libis::Format::Config[:timeouts][:pdf_optimizer]
23
+ result = Libis::Tools::Command.run(
24
+ 'gs',
25
+ '-sDEVICE=pdfwrite',
26
+ '-dCompatibilityLevel=1.4',
27
+ "-dPDFSETTINGS=/#{quality}",
28
+ '-dNOPAUSE',
29
+ '-dBATCH',
30
+ "-sOutputFile=#{target}",
31
+ "#{source}",
32
+ timeout: timeout,
33
+ kill_after: timeout * 2
34
+ )
35
+
36
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
37
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0
42
38
 
43
39
  result
44
40
  end
45
41
  end
42
+
46
43
  end
47
44
  end
48
45
  end
@@ -0,0 +1,47 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+ module Tool
12
+
13
+ class PdfSplit
14
+ include ::Libis::Tools::Logger
15
+
16
+ def self.run(source, target, options = [])
17
+ self.new.run source, target, options
18
+ end
19
+
20
+ def run(source, target, options = [])
21
+
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ timeout = Libis::Format::Config[:timeouts][:pdf_split]
27
+ result = Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_cmd],
29
+ '-cp', Libis::Format::Config[:pdf_tool],
30
+ 'SplitPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options,
34
+ timeout: timeout,
35
+ kill_after: timeout * 2
36
+ )
37
+
38
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
39
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
40
+
41
+ result
42
+ end
43
+ end
44
+
45
+ end
46
+ end
47
+ end
@@ -1,9 +1,6 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'tempfile'
4
2
  require 'csv'
5
3
  require 'fileutils'
6
- require 'pdfinfo'
7
4
 
8
5
  require 'libis/tools/extend/string'
9
6
  require 'libis/tools/logger'
@@ -15,24 +12,19 @@ require 'libis/format'
15
12
  module Libis
16
13
  module Format
17
14
  module Tool
15
+
18
16
  class PdfToPdfa
19
17
  include ::Libis::Tools::Logger
20
18
 
21
- def self.installed?
22
- result = Libis::Tools::Command.run(Libis::Format::Config[:ghostscript_cmd])
23
- result.zero?
19
+ def self.run(source, target = nil, options = {})
20
+ self.new.run source, target, options
24
21
  end
25
22
 
26
- def self.run(source, target = nil, **options)
27
- new.run source, target, **options
28
- end
23
+ def run(source, target = nil, options = nil)
29
24
 
30
- def run(source, target = nil, **options)
31
25
  tmp_target = Tools::TempFile.name(File.basename(source, '.*'), '.pdf')
32
26
  target ||= tmp_target
33
27
 
34
- metadata = get_metadata(source)
35
-
36
28
  icc_info = icc_options(options[:colorspace])
37
29
 
38
30
  icc_file = Tools::TempFile.name(icc_info[:icc_name], '.icc')
@@ -40,58 +32,52 @@ module Libis
40
32
 
41
33
  def_filename = Tools::TempFile.name('PDFA_def', '.ps')
42
34
  File.open(def_filename, 'w') do |f|
43
- f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps'))
44
- .gsub('[**ICC profile**]', icc_file)
45
- .gsub('[**ICC reference**]', icc_info[:icc_ref])
46
- .gsub('[**METADATA**]', metadata)
35
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
36
+ gsub('[** Fill in ICC profile location **]', icc_file).
37
+ gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
47
38
  end
48
39
 
49
40
  timeout = Libis::Format::Config[:timeouts][:pdf_to_pdfa]
50
41
  result = Libis::Tools::Command.run(
51
- Libis::Format::Config[:ghostscript_cmd],
52
- '-q',
53
- '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE', '-dNOSAFER',
54
- # "-dNOPLATFONTS", "-dUseCIEColor=true",
55
- # "-sColorConversionStrategy=/UseDeviceIndependentColor",
56
- "-sProcessColorModel=#{icc_info[:device]}",
57
- "-sOutputICCProfile=#{icc_file}",
58
- '-dCompatibilityLevel=1.4',
59
- '-sDEVICE=pdfwrite', '-dPDFA=1', '-dPDFACompatibilityPolicy=1',
60
- '-o', File.absolute_path(target),
61
- def_filename,
62
- source,
63
- timeout:,
64
- kill_after: timeout * 2
42
+ Libis::Format::Config[:ghostscript_cmd],
43
+ '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
44
+ '-sColorConversionStrategy=/UseDeviceIndependentColor',
45
+ "-sProcessColorModel=#{icc_info[:device]}",
46
+ '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
47
+ "-sOutputICCProfile=#{icc_file}",
48
+ '-o', File.absolute_path(target),
49
+ def_filename,
50
+ source,
51
+ timeout: timeout,
52
+ kill_after: timeout * 2
65
53
  )
66
54
 
67
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
55
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
68
56
 
69
57
  FileUtils.rm [icc_file, def_filename].compact, force: true
70
58
 
59
+ unless Format::Tool::PdfaValidator.run(target)
60
+ result[:status] = -999
61
+ result[:err] << 'Failed to validate generated PDF/A file.'
62
+ end
63
+
71
64
  result
72
65
  end
73
66
 
74
- private
75
67
 
76
- def get_metadata(source)
77
- info = Pdfinfo.new(source)
78
- metadata = "/Title (#{info.title})"
79
- metadata += "\n /Author (#{info.author})" if info.author
80
- metadata += "\n /Subject (#{info.subject})" if info.subject
81
- metadata += "\n /Keywords (#{info.keywords})" if info.keywords
82
- metadata += "\n /Creator (#{info.creator})" if info.creator
83
- metadata
84
- end
68
+ private
85
69
 
86
70
  def icc_options(colorspace)
87
71
  case colorspace.to_s.downcase
88
- when 'cmyk'
89
- { icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK' }
90
- else
91
- { icc_name: 'AdobeRGB1998', icc_ref: 'sRGB', device: 'DeviceRGB' }
72
+ when 'cmyk'
73
+ {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
74
+ else
75
+ {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
92
76
  end
93
77
  end
78
+
94
79
  end
80
+
95
81
  end
96
82
  end
97
83
  end
@@ -1,5 +1,3 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'fileutils'
4
2
 
5
3
  require 'libis/tools/extend/string'
@@ -11,57 +9,65 @@ require 'libis/format/config'
11
9
  module Libis
12
10
  module Format
13
11
  module Tool
12
+
14
13
  class PdfaValidator
15
14
  include ::Libis::Tools::Logger
16
15
 
17
16
  def self.run(source)
18
- new.run source
17
+ self.new.run source
19
18
  end
20
19
 
21
20
  def run(source)
21
+
22
22
  src_file = File.absolute_path(source)
23
23
 
24
24
  timeout = Libis::Format::Config[:timeouts][:pdfa_validator]
25
- result = nil
26
25
  if (pdfa = Libis::Format::Config[:pdfa_cmd])
27
26
  # Keep it clean: tool generates fontconfig/ cache dir in current working dir
28
27
  previous_wd = Dir.getwd
29
28
  Dir.chdir(Dir.tmpdir)
30
29
 
31
30
  result = Libis::Tools::Command.run(
32
- pdfa,
33
- '--noxml',
34
- '--level', 'B',
35
- '--verb', '0',
36
- src_file,
37
- timeout:,
38
- kill_after: timeout * 2
31
+ pdfa,
32
+ '--noxml',
33
+ '--level', 'B',
34
+ '--verb', '0',
35
+ src_file,
36
+ timeout: timeout,
37
+ kill_after: timeout * 2
39
38
  )
40
39
 
41
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
40
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
41
+ raise RuntimeError, "#{self.class} errors: #{result[:err].join("\n")}" unless result[:status] == 0 && result[:err].empty?
42
42
 
43
43
  Dir.chdir(previous_wd)
44
44
 
45
- out, err = result[:out].partition { |line| line =~ /^VLD-\[PASS\]/ }
46
- result[:out] = out
47
- result[:err] += err
48
-
45
+ unless result[:out].any? {|line| line =~ /^VLD-\[PASS\]/}
46
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
47
+ result[:out].join("\n")
48
+ return false
49
+ end
49
50
  else
50
51
  jar = Libis::Format::Config[:preflight_jar]
51
52
  result = Libis::Tools::Command.run(
52
- Libis::Format::Config[:java_cmd],
53
- '-jar', jar,
54
- src_file,
55
- timeout:,
56
- kill_after: timeout * 2
53
+ Libis::Format::Config[:java_cmd],
54
+ '-jar', jar,
55
+ src_file,
56
+ timeout: timeout,
57
+ kill_after: timeout * 2
57
58
  )
59
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
58
60
 
59
- result[:err] << "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
60
-
61
+ unless result[:status] == 0
62
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
63
+ result[:out].join("\n")
64
+ return false
65
+ end
61
66
  end
62
- result
67
+ true
63
68
  end
64
69
  end
70
+
65
71
  end
66
72
  end
67
73
  end
@@ -1,5 +1,4 @@
1
- # frozen_string_literal: true
2
-
1
+ # noinspection RubyResolve
3
2
  require 'fileutils'
4
3
 
5
4
  require 'libis/tools/extend/string'
@@ -11,62 +10,52 @@ require 'libis/format/config'
11
10
  module Libis
12
11
  module Format
13
12
  module Tool
13
+
14
14
  class SpreadsheetToOds
15
15
  include ::Libis::Tools::Logger
16
16
 
17
- def self.installed?
18
- result = Libis::Tools::Command.run(Libis::Format::Config[:soffice_cmd], '--version')
19
- result.zero?
20
- end
21
-
22
17
  def self.run(source, target, options = {})
23
- new.run source, target, options
18
+ self.new.run source, target, options
24
19
  end
25
20
 
26
21
  def run(source, target, options = {})
27
22
  workdir = '/...'
28
23
  workdir = Dir.tmpdir unless Dir.exist? workdir
29
24
 
30
- workdir = File.join(workdir, rand(1_000_000).to_s)
25
+ workdir = File.join(workdir, rand(1000000).to_s)
31
26
  FileUtils.mkpath(workdir)
32
27
 
33
28
  src_file = File.join(workdir, File.basename(source))
34
29
  FileUtils.symlink source, src_file
35
30
 
36
- tgt_file = File.join(workdir, "#{File.basename(source, '.*')}.ods")
31
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.ods')
37
32
 
38
33
  export_filter = options[:export_filter] || 'ods'
39
34
 
40
35
  timeout = Libis::Format::Config[:timeouts][:spreadsheet_to_ods] ||
41
- Libis::Format::Config[:timeouts][:office_to_pdf]
36
+ Libis::Format::Config[:timeouts][:office_to_pdf]
42
37
  result = Libis::Tools::Command.run(
43
- Libis::Format::Config[:soffice_cmd], '--headless',
44
- "-env:UserInstallation=file://#{workdir}",
45
- '--convert-to', export_filter,
46
- '--outdir', workdir, src_file,
47
- timeout:,
48
- kill_after: timeout * 2
38
+ Libis::Format::Config[:soffice_cmd], '--headless',
39
+ "-env:UserInstallation=file://#{workdir}",
40
+ '--convert-to', export_filter,
41
+ '--outdir', workdir, src_file,
42
+ timeout: timeout,
43
+ kill_after: timeout * 2
49
44
  )
50
45
 
51
- raise "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
52
-
46
+ raise RuntimeError, "#{self.class} took too long (> #{timeout} seconds) to complete" if result[:timeout]
53
47
  warn "OfficeToPdf conversion messages: \n\t#{result[:err].join("\n\t")}" unless result[:err].empty?
54
- raise "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
48
+ raise RuntimeError, "#{self.class} failed to generate target file #{tgt_file}" unless File.exist?(tgt_file)
55
49
 
56
50
  FileUtils.copy tgt_file, target, preserve: true
57
51
 
58
- {
59
- command: result,
60
- files: [target]
61
- }
62
52
  ensure
63
- begin
64
- FileUtils.rmtree workdir
65
- rescue StandardError
66
- nil
67
- end
53
+ FileUtils.rmtree workdir rescue nil
54
+
55
+ result[:out]
68
56
  end
69
57
  end
58
+
70
59
  end
71
60
  end
72
61
  end
@@ -1,17 +1,16 @@
1
- # frozen_string_literal: true
2
-
3
1
  # code utf-8
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  module Tool
6
+
8
7
  autoload :Droid, 'libis/format/tool/droid'
9
8
  autoload :ExtensionIdentification, 'libis/format/tool/extension_identification'
10
9
  autoload :Fido, 'libis/format/tool/fido'
11
10
  autoload :FileTool, 'libis/format/tool/file_tool'
12
11
 
13
12
  autoload :OfficeToPdf, 'libis/format/tool/office_to_pdf'
14
- autoload :FFMpeg, 'libis/format/tool/ff_mpeg'
13
+ autoload :FFMpeg, 'libis/format/tool/ffmpeg'
15
14
  autoload :FopPdf, 'libis/format/tool/fop_pdf'
16
15
  autoload :PdfCopy, 'libis/format/tool/pdf_copy'
17
16
  autoload :PdfMerge, 'libis/format/tool/pdf_merge'
@@ -19,7 +18,7 @@ module Libis
19
18
  autoload :PdfSplit, 'libis/format/tool/pdf_split'
20
19
  autoload :PdfToPdfa, 'libis/format/tool/pdf_to_pdfa'
21
20
  autoload :PdfaValidator, 'libis/format/tool/pdfa_validator'
22
- autoload :MsgToPdf, 'libis/format/tool/msg_to_pdf'
21
+
23
22
  end
24
23
  end
25
24
  end
@@ -1,7 +1,5 @@
1
- # frozen_string_literal: true
2
-
3
1
  module Libis
4
2
  module Format
5
- VERSION = '1.3.2'
3
+ VERSION = '2.0.0'
6
4
  end
7
5
  end
@@ -0,0 +1,71 @@
1
+ # frozen_string_literal: true
2
+ require 'yaml'
3
+ require 'singleton'
4
+
5
+ module Libis
6
+ module Format
7
+ class YamlLoader
8
+ # noinspection RubyResolve
9
+ include Singleton
10
+
11
+ def query(key, value)
12
+ case key.to_s.downcase.to_sym
13
+ when :name
14
+ return [database[value.to_s.upcase.to_sym]]
15
+ when :category
16
+ database.find_all { |_, info| info.category == value.to_s.upcase.to_sym }
17
+ when :puid
18
+ database.find_all { |_, info| info.puids.include?(value) }
19
+ when :mimetype
20
+ database.find_all { |_, info| info.mimetypes.include?(value) }
21
+ when :extension
22
+ database.find_all { |_, info| info.extensions.include?(value) }
23
+ else
24
+ return []
25
+ end.map(&:last)
26
+ end
27
+
28
+ def load_formats(file_or_hash)
29
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
30
+ hash.each do |category, format_list|
31
+ format_list.each do |format_name, format_info|
32
+ format_info.symbolize_keys!
33
+ format_name = format_name.to_sym
34
+ new_info = Libis::Format::Info.new(
35
+ name: format_name,
36
+ category: category.to_sym,
37
+ description: format_info[:NAME],
38
+ puids: format_info[:PUID]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || [],
39
+ mimetypes: format_info[:MIME]&.strip&.split(/[\s,]+/)&.map(&:strip) || [],
40
+ extensions: format_info[:EXTENSIONS]&.strip&.split(/[\s,]+/)&.map { |v| v.strip } || []
41
+ )
42
+ if (old_info = database[format_name])
43
+ new_info = Libis::Format::Info.new(
44
+ name: format_name,
45
+ category: category.to_sym,
46
+ description: new_info.description.blank? ? old_info.description : new_info.description,
47
+ puids: (old_info.puids + new_info.puids).uniq,
48
+ mimetypes: (old_info.mimetypes + new_info.mimetypes).uniq,
49
+ extensions: (old_info.extensions + new_info.extensions).uniq
50
+ )
51
+ end
52
+ database[format_name] = new_info
53
+ end
54
+ end
55
+
56
+ end
57
+
58
+ private
59
+
60
+ attr_reader :database
61
+
62
+ def initialize
63
+ @database = {}
64
+ format_database = Libis::Format::Config[:format_library_database]
65
+ load_formats(format_database)
66
+ end
67
+
68
+ end
69
+
70
+ end
71
+ end
data/lib/libis/format.rb CHANGED
@@ -1,12 +1,13 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format/version'
4
2
 
5
3
  module Libis
6
4
  module Format
7
5
  autoload :Config, 'libis/format/config'
8
- autoload :TypeDatabase, 'libis/format/type_database'
9
- autoload :TypeDatabaseImpl, 'libis/format/type_database_impl'
6
+
7
+ autoload :Info, 'libis/format/info'
8
+ autoload :Library, 'libis/format/library'
9
+ autoload :YamlLoader, 'libis/format/yaml_loader'
10
+
10
11
  autoload :Identifier, 'libis/format/identifier'
11
12
 
12
13
  autoload :Tool, 'libis/format/tool'
@@ -15,5 +16,6 @@ module Libis
15
16
  ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
16
17
  DATA_DIR = File.join(ROOT_DIR, 'data')
17
18
  TOOL_DIR = File.join(ROOT_DIR, 'tools')
19
+
18
20
  end
19
- end
21
+ end
data/lib/libis-format.rb CHANGED
@@ -1,3 +1 @@
1
- # frozen_string_literal: true
2
-
3
1
  require 'libis/format'
data/libis-format.gemspec CHANGED
@@ -1,48 +1,42 @@
1
- # frozen_string_literal: true
1
+ # coding: utf-8
2
2
 
3
- lib = File.expand_path('lib', __dir__)
3
+ lib = File.expand_path('../lib', __FILE__)
4
4
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
5
5
 
6
- require 'libis/format/version'
7
6
  require 'bundler'
7
+ require 'libis/format/version'
8
8
 
9
9
  Gem::Specification.new do |spec|
10
10
  spec.name = 'libis-format'
11
11
  spec.version = Libis::Format::VERSION
12
12
  spec.authors = ['Kris Dekeyser']
13
13
  spec.email = ['kris.dekeyser@libis.be']
14
- spec.summary = 'LIBIS File format format services.'
15
- spec.description = 'Collection of tools and classes that help to identify file formats and create derivative copies.'
14
+ spec.summary = %q{LIBIS File format format services.}
15
+ spec.description = %q{Collection of tools and classes that help to identify formats of binary files and create derivative copies (e.g. PDF from Word).}
16
16
  spec.homepage = ''
17
17
  spec.license = 'MIT'
18
18
 
19
19
  spec.platform = Gem::Platform::JAVA if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'jruby'
20
- spec.required_ruby_version = '>= 3.2'
21
20
 
22
- spec.files = `git ls-files -z`.split("\x0").select do |f|
23
- f.match(%r{^(bin/|lib/|data/|tools/|Gemfile|libis-format.gemspec|LICENSE\.txt|README\.md)})
24
- end
21
+ spec.files = `git ls-files -z`.split("\x0").reject { |f| f.match(%r{^(test|spec|features)/}) }
25
22
  spec.executables = spec.files.grep(%r{^bin/[^/]+$}) { |f| File.basename(f) }
23
+ # spec.test_files = spec.files.grep(%r{^(test|spec|features)/})
26
24
  spec.require_paths = ['lib']
27
25
 
28
- spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
29
- spec.add_runtime_dependency 'deep_dive', '~> 0.3'
30
- spec.add_runtime_dependency 'libis-mapi', '~> 0.3'
31
- spec.add_runtime_dependency 'libis-tools', '~> 1.1'
32
- spec.add_runtime_dependency 'mini_magick', '~> 4.12'
33
- spec.add_runtime_dependency 'naturally', '~> 2.2'
34
- spec.add_runtime_dependency 'new_rfc_2047', '~> 1.0'
35
- spec.add_runtime_dependency 'os', '~> 1.1'
36
- spec.add_runtime_dependency 'pdfinfo', '~> 1.4'
37
- spec.add_runtime_dependency 'pdfkit', '~> 0.8'
38
-
26
+ spec.add_development_dependency 'rake', '~> 10.3'
27
+ spec.add_development_dependency 'rspec', '~> 3.1'
39
28
  spec.add_development_dependency 'awesome_print'
40
- spec.add_development_dependency 'equivalent-xml'
41
- spec.add_development_dependency 'rake'
42
- spec.add_development_dependency 'rspec'
43
- if Gem::Platform::JAVA && spec.platform == Gem::Platform::JAVA
29
+ spec.add_development_dependency 'equivalent-xml', '~> 0.5'
30
+ if spec.platform == Gem::Platform::JAVA
44
31
  spec.add_development_dependency 'saxon-xslt'
45
32
  else
46
33
  spec.add_development_dependency 'nokogiri'
47
34
  end
35
+
36
+ spec.add_runtime_dependency 'libis-tools', '~> 1.0'
37
+ spec.add_runtime_dependency 'os', '= 0.9.6'
38
+ spec.add_runtime_dependency 'mini_magick', '~> 4.3'
39
+ spec.add_runtime_dependency 'deep_dive', '~> 0.3'
40
+ spec.add_runtime_dependency 'chromaprint', '~> 0.0.2'
41
+ spec.add_runtime_dependency 'naturally', '~> 2.1'
48
42
  end
data/tools/PdfTool.jar CHANGED
Binary file
Binary file