libis-format 0.9.1 → 0.9.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/.travis.yml +14 -9
  4. data/README.md +1 -0
  5. data/bin/pdf_copy +13 -0
  6. data/data/ISOcoated_v2_eci.icc +0 -0
  7. data/data/PDFA_def.ps +15 -7
  8. data/data/eciRGB_v2.icc +0 -0
  9. data/data/types.yml +29 -25
  10. data/lib/libis/format/config.rb +35 -0
  11. data/lib/libis/format/converter/base.rb +23 -26
  12. data/lib/libis/format/converter/chain.rb +126 -27
  13. data/lib/libis/format/converter/image_converter.rb +211 -0
  14. data/lib/libis/format/converter/office_converter.rb +46 -0
  15. data/lib/libis/format/converter/pdf_converter.rb +110 -0
  16. data/lib/libis/format/converter/repository.rb +38 -50
  17. data/lib/libis/format/droid.rb +15 -8
  18. data/lib/libis/format/fido.rb +3 -10
  19. data/lib/libis/format/identifier.rb +18 -14
  20. data/lib/libis/format/office_to_pdf.rb +52 -0
  21. data/lib/libis/format/pdf_copy.rb +50 -0
  22. data/lib/libis/format/pdf_to_pdfa.rb +79 -0
  23. data/lib/libis/format/pdfa_validator.rb +61 -0
  24. data/lib/libis/format/type_database.rb +1 -1
  25. data/lib/libis/format/version.rb +1 -1
  26. data/lib/libis/format.rb +9 -0
  27. data/libis-format.gemspec +2 -0
  28. data/spec/converter_spec.rb +212 -0
  29. data/spec/data/test-options.jpg +0 -0
  30. data/spec/data/test.jpg +0 -0
  31. data/spec/data/test.pdf.tif +0 -0
  32. data/spec/data/test.png +0 -0
  33. data/spec/data/test_pdfa.pdf +0 -0
  34. data/spec/identifier_spec.rb +1 -0
  35. data/tools/PdfTool.jar +0 -0
  36. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  37. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  38. metadata +56 -5
  39. data/data/ISOcoated.icc +0 -0
  40. data/tools/fido/argparselocal.pyc +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d75c38cec7b9bcbe8ec92de39c229cb4bc3ea8f1
4
- data.tar.gz: bd29485e129a54d667414d448d3efd34cc4cb0f6
3
+ metadata.gz: d3cd5631909a7f886d895de734e4f3ca542dac4b
4
+ data.tar.gz: 80b1a19961bbbd0f1db1bbcfc06f20674b115417
5
5
  SHA512:
6
- metadata.gz: da9c536bd82e23ed9aec7e156dbc8f6119f681bb86a8c94c13e59b7995981b925bf93701bad084ffbc7a3b93b822e2d26004d45dc262baabc4bfd3b08657aefa
7
- data.tar.gz: b42eaa83bcd737f87914c8d3b30bf02e65094f6e6a8bcdff03fe343f62546f12774880030e0a16f702d40dbb7d6e5540ba4a5e6002f2f6d52fa58db32397fbe4
6
+ metadata.gz: e7dda44c769f213e908745eba06e65d85252c8fbe9b54dd16b61ea9e4ceea2bbaaecf38c2db12c28e0186331fa36ec0a8303f2a5505610b881c90cc8e5e3ee71
7
+ data.tar.gz: 2b3df006e9d35f4dc1dd4543790d301a600bb80fabd059bd80b78b1c63f0111430bb3c308f3ef8976a2aa3329b4f1a07bffda79832a5f17a1c8503f989203d28
data/.gitignore CHANGED
@@ -6,10 +6,13 @@
6
6
  /doc/
7
7
  /pkg/
8
8
  /spec/reports/
9
+ /spec/work/
9
10
  /tmp/
10
11
  *.bundle
11
12
  *.so
12
13
  *.o
13
14
  *.a
14
15
  mkmf.log
15
- /.idea/
16
+ /.idea/
17
+ *.pyc
18
+ tools/pdf/
data/.travis.yml CHANGED
@@ -1,21 +1,17 @@
1
1
  language: ruby
2
+ sudo: false
2
3
  cache: bundler
3
4
  rvm:
4
- - 1.9.3
5
5
  - 2.1.0
6
6
  - 2.2.0
7
7
  - ruby-head
8
- - jruby-19mode
8
+ - jruby-9.0.1.0
9
9
  jdk:
10
10
  - openjdk7
11
11
  - oraclejdk7
12
12
  - oraclejdk8
13
13
  matrix:
14
14
  exclude:
15
- - rvm: 1.9.3
16
- jdk: oraclejdk7
17
- - rvm: 1.9.3
18
- jdk: oraclejdk8
19
15
  - rvm: 2.1.0
20
16
  jdk: oraclejdk7
21
17
  - rvm: 2.1.0
@@ -28,9 +24,18 @@ matrix:
28
24
  jdk: oraclejdk7
29
25
  - rvm: ruby-head
30
26
  jdk: oraclejdk8
27
+ allow_failures:
28
+ - rvm: 2.1.0
29
+ - rvm: 2.2.0
30
+ - rvm: ruby-head
31
+ - rvm: jruby-9.0.1.0
31
32
  branches:
32
33
  only:
33
34
  - master
34
- before_install:
35
- - sudo apt-get update -qq
36
- - sudo apt-get install -y python2.7
35
+ addons:
36
+ apt:
37
+ packages:
38
+ - python2.7
39
+ - imagemagick
40
+ - libreoffice
41
+ - ghostscript
data/README.md CHANGED
@@ -1,3 +1,4 @@
1
+ [![Gem Version](https://badge.fury.io/rb/libis-format.svg)](http://badge.fury.io/rb/libis-format)
1
2
  [![Build Status](https://travis-ci.org/Kris-LIBIS/LIBIS_Format.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/LIBIS_Format)
2
3
  [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/LIBIS_Format.svg)](https://coveralls.io/r/Kris-LIBIS/LIBIS_Format)
3
4
  [![Dependency Status](https://gemnasium.com/Kris-LIBIS/LIBIS_Format.svg)](https://gemnasium.com/Kris-LIBIS/LIBIS_Format)
data/bin/pdf_copy ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'libis-format'
4
+ require 'libis-tools'
5
+
6
+ ::Libis::Tools::Config.logger.level = ::Logger::Severity::WARN
7
+
8
+ source = ARGV.shift
9
+ target = ARGV.shift
10
+ options = ARGV
11
+
12
+ ::Libis::Format::PdfCopy.run source, target, options
13
+
Binary file
data/data/PDFA_def.ps CHANGED
@@ -1,23 +1,31 @@
1
1
  %!
2
- % $Id$
3
2
  % This is a sample prefix file for creating a PDF/A document.
4
3
  % Feel free to modify entries marked with "Customize".
5
-
6
4
  % This assumes an ICC profile to reside in the file (ISO Coated sb.icc),
7
5
  % unless the user modifies the corresponding line below.
8
6
 
9
7
  % Define entries in the document Info dictionary :
10
-
11
- /ICCProfile (ISOcoated.icc) % Customize.
8
+ /ICCProfile ([** Fill in ICC profile location **])
12
9
  def
13
10
 
14
- [ /Title (Title) % Customize.
11
+ [ /Title (Title)
15
12
  /DOCINFO pdfmark
16
13
 
17
14
  % Define an ICC profile :
18
15
 
19
16
  [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
20
- [{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {4} ifelse >> /PUT pdfmark
17
+ [{icc_PDFA}
18
+ <<
19
+ /N currentpagedevice /ProcessColorModel known {
20
+ currentpagedevice /ProcessColorModel get dup /DeviceGray eq
21
+ {pop 1} {
22
+ /DeviceRGB eq
23
+ {3}{4} ifelse
24
+ } ifelse
25
+ } {
26
+ (ERROR, unable to determine ProcessColorModel) == flush
27
+ } ifelse
28
+ >> /PUT pdfmark
21
29
  [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
22
30
 
23
31
  % Define the output intent dictionary :
@@ -27,6 +35,6 @@ def
27
35
  /Type /OutputIntent % Must be so (the standard requires).
28
36
  /S /GTS_PDFA1 % Must be so (the standard requires).
29
37
  /DestOutputProfile {icc_PDFA} % Must be so (see above).
30
- /OutputConditionIdentifier (CGATS TR001) % Customize
38
+ /OutputConditionIdentifier ([** Fill in ICC reference name **])
31
39
  >> /PUT pdfmark
32
40
  [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
Binary file
data/data/types.yml CHANGED
@@ -13,12 +13,12 @@ IMAGE:
13
13
  MIME: image/tiff
14
14
  EXTENSIONS: tif,tiff
15
15
 
16
- JPEG2000:
16
+ JP2:
17
17
  NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
18
18
  MIME: image/jp2
19
19
  EXTENSIONS: jp2
20
20
 
21
- JPEG:
21
+ JPG:
22
22
  NAME: Joint Photographic Experts Group (JPEG)
23
23
  MIME: image/jpeg
24
24
  EXTENSIONS: jpg,jpe,jpeg
@@ -120,7 +120,7 @@ VIDEO:
120
120
  MIME: video/x-flv
121
121
  EXTENSIONS: flv
122
122
 
123
- DOCUMENT:
123
+ TEXT:
124
124
 
125
125
  TXT:
126
126
  NAME: Unformatted text
@@ -150,28 +150,6 @@ DOCUMENT:
150
150
  MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
151
151
  EXTENSIONS: docx
152
152
 
153
- MSXLS:
154
- NAME: Microsoft Excel Spreadsheet (XLS)
155
- MIME: application/vnd.ms-excel,application/msexcel
156
- EXTENSIONS: xls
157
-
158
- MSXLSX:
159
- NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
160
- PUID: fido-fmt/189.xl
161
- MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
162
- EXTENSIONS: xslx
163
-
164
- MSPPT:
165
- NAME: Microsoft Powerpoint Presentation (PPT)
166
- MIME: application/vnd.ms-powerpoint,application/mspowerpoint
167
- EXTENSIONS: ppt
168
-
169
- MSPPTX:
170
- NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
171
- PUID: fido-fmt/189.ppt
172
- MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
173
- EXTENSIONS: pptx
174
-
175
153
  PDF:
176
154
  NAME: Adobe Portable Document Format (PDF)
177
155
  PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
@@ -203,6 +181,32 @@ DOCUMENT:
203
181
  PUID: fmt/101
204
182
  EXTENSIONS: xml
205
183
 
184
+ TABULAR:
185
+
186
+ MSXLS:
187
+ NAME: Microsoft Excel Spreadsheet (XLS)
188
+ MIME: application/vnd.ms-excel,application/msexcel
189
+ EXTENSIONS: xls
190
+
191
+ MSXLSX:
192
+ NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
193
+ PUID: fido-fmt/189.xl
194
+ MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
195
+ EXTENSIONS: xslx
196
+
197
+ PRESENTATION:
198
+
199
+ MSPPT:
200
+ NAME: Microsoft Powerpoint Presentation (PPT)
201
+ MIME: application/vnd.ms-powerpoint,application/mspowerpoint
202
+ EXTENSIONS: ppt
203
+
204
+ MSPPTX:
205
+ NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
206
+ PUID: fido-fmt/189.ppt
207
+ MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
208
+ EXTENSIONS: pptx
209
+
206
210
  ARCHIVE:
207
211
 
208
212
  EAD:
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+ require 'os'
3
+
4
+ module Libis
5
+ module Format
6
+
7
+ # noinspection RubyConstantNamingConvention
8
+ Config = ::Libis::Tools::Config
9
+
10
+ Config[:converter_chain_max_level] = 8
11
+
12
+ Config[:java_path] = 'java'
13
+ Config[:soffice_path] = 'soffice'
14
+ Config[:ghostscript_path] = 'gs'
15
+ # Config[:pdfa_path] =
16
+ # File.absolute_path(
17
+ # File.join(
18
+ # File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
19
+ # )
20
+ # )
21
+ Config[:droid_path] =
22
+ File.absolute_path(
23
+ File.join(
24
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
25
+ )
26
+ )
27
+ Config[:fido_path] =
28
+ File.absolute_path(
29
+ File.join(
30
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
31
+ )
32
+ )
33
+
34
+ end
35
+ end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  ### require 'tools/string'
4
-
4
+ require 'tmpdir'
5
5
  require 'libis/tools/logger'
6
6
  require 'libis/format/type_database'
7
7
 
@@ -14,37 +14,36 @@ module Libis
14
14
  class Base
15
15
  include Libis::Tools::Logger
16
16
 
17
- def input_types
18
- raise RuntimeError, 'Method #input_types needs to be overridden in converter'
19
- end
20
-
21
- protected
17
+ attr_reader :options, :flags
22
18
 
23
- def output_types
24
- raise RuntimeError, 'Method #output_types needs to be overridden in converter'
19
+ def initialize
20
+ @options = {}
21
+ @flags = {}
25
22
  end
26
23
 
27
- attr_accessor :source, :options, :flags
28
-
29
- def init(_)
30
- raise RuntimeError, 'Method #init should be implemented in converter'
24
+ def convert(source, target, format, opts = {})
25
+ unless File.exist? source
26
+ error "Cannot find file '#{source}'."
27
+ return nil
28
+ end
29
+ @options.merge!(opts[:options]) if opts[:options]
30
+ @flags.merge!(opts[:flags]) if opts[:flags]
31
31
  end
32
32
 
33
- def do_convert(_, _)
34
- raise RuntimeError, 'Method #do_convert should be implemented in converter'
33
+ def self.input_types(_ = nil)
34
+ raise RuntimeError, 'Method #input_types needs to be overridden in converter'
35
35
  end
36
36
 
37
- public
38
-
39
- def initialize( source = nil, options = {}, flags = {} )
40
- @source = source
41
- @options = options ? options : {}
42
- @flags = flags ? flags : {}
43
- init(source.to_s rescue nil)
37
+ def self.output_types(_ = nil)
38
+ raise RuntimeError, 'Method #output_types needs to be overridden in converter'
44
39
  end
45
40
 
46
- def convert(target, format = nil)
47
- do_convert(target, format)
41
+ def using_temp(target)
42
+ tempfile = File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname(['convert', File.extname(target)], File.basename(target, '.*')))
43
+ result = yield tempfile
44
+ return nil unless result
45
+ FileUtils.move result, target
46
+ target
48
47
  end
49
48
 
50
49
  def Base.inherited( klass )
@@ -54,7 +53,7 @@ module Libis
54
53
  class << self
55
54
 
56
55
  def conversions
57
- input_types.inject({}) do |input_type, hash|
56
+ input_types.inject({}) do |hash, input_type|
58
57
  hash[input_type] = output_types
59
58
  hash
60
59
  end
@@ -92,10 +91,8 @@ module Libis
92
91
 
93
92
  end
94
93
 
95
-
96
94
  end
97
95
 
98
-
99
96
  end
100
97
 
101
98
  end
@@ -1,6 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  require 'fileutils'
4
+ require 'deep_dive'
4
5
 
5
6
  require 'libis/tools/logger'
6
7
  require 'libis/format/type_database'
@@ -11,66 +12,164 @@ module Libis
11
12
 
12
13
  class Chain
13
14
  include ::Libis::Tools::Logger
15
+ include DeepDive
14
16
 
15
- def initialize(converter_chain)
16
- @converter_chain = converter_chain
17
+ def initialize(source_format, target_format, operations = {})
18
+ @source_format = source_format
19
+ @target_format = target_format
20
+ @operations = operations
21
+ @converter_chain = []
22
+ end
23
+
24
+ # @param [Libis::Format::Converter::Base.class] converter
25
+ # @return [Array[Hash]]
26
+ def append(converter)
27
+ return [] unless converter
28
+ valid_chain_nodes(converter).map do |node|
29
+ self.ddup.add_chain_node(node)
30
+ end.compact
31
+ end
32
+
33
+ def closed?
34
+ !@converter_chain.empty? &&
35
+ @converter_chain.first[:input] == @source_format &&
36
+ @converter_chain.last[:output] == @target_format
37
+ end
38
+
39
+ def valid?
40
+ closed? && apply_operations
17
41
  end
18
42
 
19
43
  def to_array
20
44
  @converter_chain
21
45
  end
22
46
 
23
- def convert(src_file, target_file, operations = [])
47
+ def size
48
+ @converter_chain.size
49
+ end
24
50
 
25
- chain = @converter_chain.clone
51
+ alias_method :length, :size
52
+
53
+ def to_s
54
+ # nodes_string = @converter_chain.map do |node|
55
+ # node_name = node[:converter].name.gsub(/^.*::/,'')
56
+ # node_operations = '(' + node[:operations].map do |operation|
57
+ # op = "#{operation[:method]}:#{operation[:argument]}"
58
+ # op
59
+ # end.join(', ') + ')' rescue ''
60
+ # node_string = "#{node_name}#{node_operations}->-#{node[:output]}"
61
+ # node_string
62
+ # end.join('->-')
63
+ # "#{@source_format}->-#{nodes_string}"
64
+ "#{@source_format}->-#{@converter_chain.map do |node|
65
+ "#{node[:converter].name.gsub(/^.*::/,'')}#{node[:operations] ?
66
+ "(#{node[:operations].each { |operation| "#{operation[:method]}:#{operation[:argument]}" }.join(',')})" :
67
+ ''
68
+ }->-#{node[:output]}"
69
+ end.join('->-')}"
70
+ end
26
71
 
27
- my_operations = {}
72
+ def convert(src_file, target_file)
28
73
 
29
- # sanity check: check if the required operations are supported by at least one converter in the chain
30
- operations.each do |k,v|
31
- method = k.to_s.downcase.to_sym
32
- chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
33
- if chain_element
34
- my_operations[chain_element[:converter]] ||= {}
35
- my_operations[chain_element[:converter]][method] = v
36
- else
37
- error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
38
- end
74
+ unless valid?
75
+ error 'Converter chain is not valid'
76
+ return nil
39
77
  end
40
78
 
41
79
  temp_files = []
42
80
 
43
81
  # noinspection RubyParenthesesAroundConditionInspection
44
- while (chain_element = chain.shift)
82
+ result = @converter_chain.each_with_index do |node, i|
45
83
 
46
- target_type = chain_element[:target]
47
- converter_class = chain_element[:converter]
48
- converter = converter_class.new(src_file)
84
+ target_type = node[:output]
85
+ converter_class = node[:converter]
86
+ converter = converter_class.new
49
87
 
50
- my_operations[converter_class].each do |k,v|
51
- converter.send k, v
88
+ node[:operations].each do |operation|
89
+ converter.send operation[:method], operation[:argument]
52
90
  end
53
91
 
54
92
  target = target_file
55
93
 
56
- unless chain.empty?
57
- target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
58
- target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
94
+ if i < size
95
+ target += ".temp.#{TypeDatabase.type_extentions(target_type).first}"
96
+ target += ".#{TypeDatabase.type_extentions(target_type).first}" while File.exist? target
59
97
  temp_files << target
60
98
  end
61
99
 
62
100
  FileUtils.mkdir_p File.dirname(target)
63
101
 
64
- converter.convert(target, target_type)
102
+ src_file = converter.convert(src_file, target, target_type)
65
103
 
66
- src_file = target
104
+ break :failed unless src_file
67
105
 
68
106
  end
69
107
 
70
108
  temp_files.each do |f|
71
- File.delete(f)
109
+ FileUtils.rm(f, force: true)
110
+ end
111
+
112
+ result == :failed ? nil : target_file
113
+
114
+ end
115
+
116
+ def valid_chain_nodes(converter)
117
+ return [] if closed?
118
+ source_format = @converter_chain.last[:output] rescue @source_format
119
+ nodes = []
120
+ if converter.input_types.include? source_format
121
+ converter.output_types(source_format).each do |format|
122
+ node = {converter: converter, input: source_format, output: format}
123
+ next if node_exists?(node)
124
+ nodes << node
125
+ end
72
126
  end
127
+ nodes
128
+ end
73
129
 
130
+ def add_chain_node(node = {})
131
+ return nil if closed?
132
+ last_converter = @converter_chain.last
133
+ source_format = last_converter ? last_converter[:output] : @source_format
134
+ node[:input] ||= source_format
135
+ return nil unless node[:input] == source_format
136
+ return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
137
+ return nil unless node[:converter].input_types.include? source_format
138
+ return nil if node_exists?(node)
139
+ @converter_chain << node
140
+ # debug "Chain: #{self}"
141
+ self
142
+ end
143
+
144
+ def apply_operations
145
+ return false unless closed?
146
+ temp_chain = @converter_chain.reverse.ddup
147
+ applied = true
148
+ operations = @operations.ddup
149
+ while (operation = operations.shift)
150
+ method = operation.first.to_s.downcase.to_sym
151
+ applied &&= :found == temp_chain.each do |node|
152
+ next unless node[:converter].instance_methods.include?(method)
153
+ node[:operations] ||= []
154
+ node[:operations] << {method: method, argument: operation.last}
155
+ break :found
156
+ end
157
+ end
158
+ if applied && operations.empty?
159
+ @converter_chain = temp_chain.reverse
160
+ @operations.clear
161
+ return true
162
+ end
163
+ false
164
+ end
165
+
166
+
167
+ private
168
+
169
+ def node_exists?(node)
170
+ @converter_chain.detect do |n|
171
+ n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
172
+ end
74
173
  end
75
174
 
76
175
  end