libis-format 0.9.1 → 0.9.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (40) hide show
  1. checksums.yaml +4 -4
  2. data/.gitignore +4 -1
  3. data/.travis.yml +14 -9
  4. data/README.md +1 -0
  5. data/bin/pdf_copy +13 -0
  6. data/data/ISOcoated_v2_eci.icc +0 -0
  7. data/data/PDFA_def.ps +15 -7
  8. data/data/eciRGB_v2.icc +0 -0
  9. data/data/types.yml +29 -25
  10. data/lib/libis/format/config.rb +35 -0
  11. data/lib/libis/format/converter/base.rb +23 -26
  12. data/lib/libis/format/converter/chain.rb +126 -27
  13. data/lib/libis/format/converter/image_converter.rb +211 -0
  14. data/lib/libis/format/converter/office_converter.rb +46 -0
  15. data/lib/libis/format/converter/pdf_converter.rb +110 -0
  16. data/lib/libis/format/converter/repository.rb +38 -50
  17. data/lib/libis/format/droid.rb +15 -8
  18. data/lib/libis/format/fido.rb +3 -10
  19. data/lib/libis/format/identifier.rb +18 -14
  20. data/lib/libis/format/office_to_pdf.rb +52 -0
  21. data/lib/libis/format/pdf_copy.rb +50 -0
  22. data/lib/libis/format/pdf_to_pdfa.rb +79 -0
  23. data/lib/libis/format/pdfa_validator.rb +61 -0
  24. data/lib/libis/format/type_database.rb +1 -1
  25. data/lib/libis/format/version.rb +1 -1
  26. data/lib/libis/format.rb +9 -0
  27. data/libis-format.gemspec +2 -0
  28. data/spec/converter_spec.rb +212 -0
  29. data/spec/data/test-options.jpg +0 -0
  30. data/spec/data/test.jpg +0 -0
  31. data/spec/data/test.pdf.tif +0 -0
  32. data/spec/data/test.png +0 -0
  33. data/spec/data/test_pdfa.pdf +0 -0
  34. data/spec/identifier_spec.rb +1 -0
  35. data/tools/PdfTool.jar +0 -0
  36. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  37. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  38. metadata +56 -5
  39. data/data/ISOcoated.icc +0 -0
  40. data/tools/fido/argparselocal.pyc +0 -0
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d75c38cec7b9bcbe8ec92de39c229cb4bc3ea8f1
4
- data.tar.gz: bd29485e129a54d667414d448d3efd34cc4cb0f6
3
+ metadata.gz: d3cd5631909a7f886d895de734e4f3ca542dac4b
4
+ data.tar.gz: 80b1a19961bbbd0f1db1bbcfc06f20674b115417
5
5
  SHA512:
6
- metadata.gz: da9c536bd82e23ed9aec7e156dbc8f6119f681bb86a8c94c13e59b7995981b925bf93701bad084ffbc7a3b93b822e2d26004d45dc262baabc4bfd3b08657aefa
7
- data.tar.gz: b42eaa83bcd737f87914c8d3b30bf02e65094f6e6a8bcdff03fe343f62546f12774880030e0a16f702d40dbb7d6e5540ba4a5e6002f2f6d52fa58db32397fbe4
6
+ metadata.gz: e7dda44c769f213e908745eba06e65d85252c8fbe9b54dd16b61ea9e4ceea2bbaaecf38c2db12c28e0186331fa36ec0a8303f2a5505610b881c90cc8e5e3ee71
7
+ data.tar.gz: 2b3df006e9d35f4dc1dd4543790d301a600bb80fabd059bd80b78b1c63f0111430bb3c308f3ef8976a2aa3329b4f1a07bffda79832a5f17a1c8503f989203d28
data/.gitignore CHANGED
@@ -6,10 +6,13 @@
6
6
  /doc/
7
7
  /pkg/
8
8
  /spec/reports/
9
+ /spec/work/
9
10
  /tmp/
10
11
  *.bundle
11
12
  *.so
12
13
  *.o
13
14
  *.a
14
15
  mkmf.log
15
- /.idea/
16
+ /.idea/
17
+ *.pyc
18
+ tools/pdf/
data/.travis.yml CHANGED
@@ -1,21 +1,17 @@
1
1
  language: ruby
2
+ sudo: false
2
3
  cache: bundler
3
4
  rvm:
4
- - 1.9.3
5
5
  - 2.1.0
6
6
  - 2.2.0
7
7
  - ruby-head
8
- - jruby-19mode
8
+ - jruby-9.0.1.0
9
9
  jdk:
10
10
  - openjdk7
11
11
  - oraclejdk7
12
12
  - oraclejdk8
13
13
  matrix:
14
14
  exclude:
15
- - rvm: 1.9.3
16
- jdk: oraclejdk7
17
- - rvm: 1.9.3
18
- jdk: oraclejdk8
19
15
  - rvm: 2.1.0
20
16
  jdk: oraclejdk7
21
17
  - rvm: 2.1.0
@@ -28,9 +24,18 @@ matrix:
28
24
  jdk: oraclejdk7
29
25
  - rvm: ruby-head
30
26
  jdk: oraclejdk8
27
+ allow_failures:
28
+ - rvm: 2.1.0
29
+ - rvm: 2.2.0
30
+ - rvm: ruby-head
31
+ - rvm: jruby-9.0.1.0
31
32
  branches:
32
33
  only:
33
34
  - master
34
- before_install:
35
- - sudo apt-get update -qq
36
- - sudo apt-get install -y python2.7
35
+ addons:
36
+ apt:
37
+ packages:
38
+ - python2.7
39
+ - imagemagick
40
+ - libreoffice
41
+ - ghostscript
data/README.md CHANGED
@@ -1,3 +1,4 @@
1
+ [![Gem Version](https://badge.fury.io/rb/libis-format.svg)](http://badge.fury.io/rb/libis-format)
1
2
  [![Build Status](https://travis-ci.org/Kris-LIBIS/LIBIS_Format.svg?branch=master)](https://travis-ci.org/Kris-LIBIS/LIBIS_Format)
2
3
  [![Coverage Status](https://img.shields.io/coveralls/Kris-LIBIS/LIBIS_Format.svg)](https://coveralls.io/r/Kris-LIBIS/LIBIS_Format)
3
4
  [![Dependency Status](https://gemnasium.com/Kris-LIBIS/LIBIS_Format.svg)](https://gemnasium.com/Kris-LIBIS/LIBIS_Format)
data/bin/pdf_copy ADDED
@@ -0,0 +1,13 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'libis-format'
4
+ require 'libis-tools'
5
+
6
+ ::Libis::Tools::Config.logger.level = ::Logger::Severity::WARN
7
+
8
+ source = ARGV.shift
9
+ target = ARGV.shift
10
+ options = ARGV
11
+
12
+ ::Libis::Format::PdfCopy.run source, target, options
13
+
Binary file
data/data/PDFA_def.ps CHANGED
@@ -1,23 +1,31 @@
1
1
  %!
2
- % $Id$
3
2
  % This is a sample prefix file for creating a PDF/A document.
4
3
  % Feel free to modify entries marked with "Customize".
5
-
6
4
  % This assumes an ICC profile to reside in the file (ISO Coated sb.icc),
7
5
  % unless the user modifies the corresponding line below.
8
6
 
9
7
  % Define entries in the document Info dictionary :
10
-
11
- /ICCProfile (ISOcoated.icc) % Customize.
8
+ /ICCProfile ([** Fill in ICC profile location **])
12
9
  def
13
10
 
14
- [ /Title (Title) % Customize.
11
+ [ /Title (Title)
15
12
  /DOCINFO pdfmark
16
13
 
17
14
  % Define an ICC profile :
18
15
 
19
16
  [/_objdef {icc_PDFA} /type /stream /OBJ pdfmark
20
- [{icc_PDFA} <</N systemdict /ProcessColorModel get /DeviceGray eq {1} {4} ifelse >> /PUT pdfmark
17
+ [{icc_PDFA}
18
+ <<
19
+ /N currentpagedevice /ProcessColorModel known {
20
+ currentpagedevice /ProcessColorModel get dup /DeviceGray eq
21
+ {pop 1} {
22
+ /DeviceRGB eq
23
+ {3}{4} ifelse
24
+ } ifelse
25
+ } {
26
+ (ERROR, unable to determine ProcessColorModel) == flush
27
+ } ifelse
28
+ >> /PUT pdfmark
21
29
  [{icc_PDFA} ICCProfile (r) file /PUT pdfmark
22
30
 
23
31
  % Define the output intent dictionary :
@@ -27,6 +35,6 @@ def
27
35
  /Type /OutputIntent % Must be so (the standard requires).
28
36
  /S /GTS_PDFA1 % Must be so (the standard requires).
29
37
  /DestOutputProfile {icc_PDFA} % Must be so (see above).
30
- /OutputConditionIdentifier (CGATS TR001) % Customize
38
+ /OutputConditionIdentifier ([** Fill in ICC reference name **])
31
39
  >> /PUT pdfmark
32
40
  [{Catalog} <</OutputIntents [ {OutputIntent_PDFA} ]>> /PUT pdfmark
Binary file
data/data/types.yml CHANGED
@@ -13,12 +13,12 @@ IMAGE:
13
13
  MIME: image/tiff
14
14
  EXTENSIONS: tif,tiff
15
15
 
16
- JPEG2000:
16
+ JP2:
17
17
  NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
18
18
  MIME: image/jp2
19
19
  EXTENSIONS: jp2
20
20
 
21
- JPEG:
21
+ JPG:
22
22
  NAME: Joint Photographic Experts Group (JPEG)
23
23
  MIME: image/jpeg
24
24
  EXTENSIONS: jpg,jpe,jpeg
@@ -120,7 +120,7 @@ VIDEO:
120
120
  MIME: video/x-flv
121
121
  EXTENSIONS: flv
122
122
 
123
- DOCUMENT:
123
+ TEXT:
124
124
 
125
125
  TXT:
126
126
  NAME: Unformatted text
@@ -150,28 +150,6 @@ DOCUMENT:
150
150
  MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
151
151
  EXTENSIONS: docx
152
152
 
153
- MSXLS:
154
- NAME: Microsoft Excel Spreadsheet (XLS)
155
- MIME: application/vnd.ms-excel,application/msexcel
156
- EXTENSIONS: xls
157
-
158
- MSXLSX:
159
- NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
160
- PUID: fido-fmt/189.xl
161
- MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
162
- EXTENSIONS: xslx
163
-
164
- MSPPT:
165
- NAME: Microsoft Powerpoint Presentation (PPT)
166
- MIME: application/vnd.ms-powerpoint,application/mspowerpoint
167
- EXTENSIONS: ppt
168
-
169
- MSPPTX:
170
- NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
171
- PUID: fido-fmt/189.ppt
172
- MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
173
- EXTENSIONS: pptx
174
-
175
153
  PDF:
176
154
  NAME: Adobe Portable Document Format (PDF)
177
155
  PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
@@ -203,6 +181,32 @@ DOCUMENT:
203
181
  PUID: fmt/101
204
182
  EXTENSIONS: xml
205
183
 
184
+ TABULAR:
185
+
186
+ MSXLS:
187
+ NAME: Microsoft Excel Spreadsheet (XLS)
188
+ MIME: application/vnd.ms-excel,application/msexcel
189
+ EXTENSIONS: xls
190
+
191
+ MSXLSX:
192
+ NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
193
+ PUID: fido-fmt/189.xl
194
+ MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
195
+ EXTENSIONS: xslx
196
+
197
+ PRESENTATION:
198
+
199
+ MSPPT:
200
+ NAME: Microsoft Powerpoint Presentation (PPT)
201
+ MIME: application/vnd.ms-powerpoint,application/mspowerpoint
202
+ EXTENSIONS: ppt
203
+
204
+ MSPPTX:
205
+ NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
206
+ PUID: fido-fmt/189.ppt
207
+ MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
208
+ EXTENSIONS: pptx
209
+
206
210
  ARCHIVE:
207
211
 
208
212
  EAD:
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+ require 'os'
3
+
4
+ module Libis
5
+ module Format
6
+
7
+ # noinspection RubyConstantNamingConvention
8
+ Config = ::Libis::Tools::Config
9
+
10
+ Config[:converter_chain_max_level] = 8
11
+
12
+ Config[:java_path] = 'java'
13
+ Config[:soffice_path] = 'soffice'
14
+ Config[:ghostscript_path] = 'gs'
15
+ # Config[:pdfa_path] =
16
+ # File.absolute_path(
17
+ # File.join(
18
+ # File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
19
+ # )
20
+ # )
21
+ Config[:droid_path] =
22
+ File.absolute_path(
23
+ File.join(
24
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
25
+ )
26
+ )
27
+ Config[:fido_path] =
28
+ File.absolute_path(
29
+ File.join(
30
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
31
+ )
32
+ )
33
+
34
+ end
35
+ end
@@ -1,7 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  ### require 'tools/string'
4
-
4
+ require 'tmpdir'
5
5
  require 'libis/tools/logger'
6
6
  require 'libis/format/type_database'
7
7
 
@@ -14,37 +14,36 @@ module Libis
14
14
  class Base
15
15
  include Libis::Tools::Logger
16
16
 
17
- def input_types
18
- raise RuntimeError, 'Method #input_types needs to be overridden in converter'
19
- end
20
-
21
- protected
17
+ attr_reader :options, :flags
22
18
 
23
- def output_types
24
- raise RuntimeError, 'Method #output_types needs to be overridden in converter'
19
+ def initialize
20
+ @options = {}
21
+ @flags = {}
25
22
  end
26
23
 
27
- attr_accessor :source, :options, :flags
28
-
29
- def init(_)
30
- raise RuntimeError, 'Method #init should be implemented in converter'
24
+ def convert(source, target, format, opts = {})
25
+ unless File.exist? source
26
+ error "Cannot find file '#{source}'."
27
+ return nil
28
+ end
29
+ @options.merge!(opts[:options]) if opts[:options]
30
+ @flags.merge!(opts[:flags]) if opts[:flags]
31
31
  end
32
32
 
33
- def do_convert(_, _)
34
- raise RuntimeError, 'Method #do_convert should be implemented in converter'
33
+ def self.input_types(_ = nil)
34
+ raise RuntimeError, 'Method #input_types needs to be overridden in converter'
35
35
  end
36
36
 
37
- public
38
-
39
- def initialize( source = nil, options = {}, flags = {} )
40
- @source = source
41
- @options = options ? options : {}
42
- @flags = flags ? flags : {}
43
- init(source.to_s rescue nil)
37
+ def self.output_types(_ = nil)
38
+ raise RuntimeError, 'Method #output_types needs to be overridden in converter'
44
39
  end
45
40
 
46
- def convert(target, format = nil)
47
- do_convert(target, format)
41
+ def using_temp(target)
42
+ tempfile = File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname(['convert', File.extname(target)], File.basename(target, '.*')))
43
+ result = yield tempfile
44
+ return nil unless result
45
+ FileUtils.move result, target
46
+ target
48
47
  end
49
48
 
50
49
  def Base.inherited( klass )
@@ -54,7 +53,7 @@ module Libis
54
53
  class << self
55
54
 
56
55
  def conversions
57
- input_types.inject({}) do |input_type, hash|
56
+ input_types.inject({}) do |hash, input_type|
58
57
  hash[input_type] = output_types
59
58
  hash
60
59
  end
@@ -92,10 +91,8 @@ module Libis
92
91
 
93
92
  end
94
93
 
95
-
96
94
  end
97
95
 
98
-
99
96
  end
100
97
 
101
98
  end
@@ -1,6 +1,7 @@
1
1
  # coding: utf-8
2
2
 
3
3
  require 'fileutils'
4
+ require 'deep_dive'
4
5
 
5
6
  require 'libis/tools/logger'
6
7
  require 'libis/format/type_database'
@@ -11,66 +12,164 @@ module Libis
11
12
 
12
13
  class Chain
13
14
  include ::Libis::Tools::Logger
15
+ include DeepDive
14
16
 
15
- def initialize(converter_chain)
16
- @converter_chain = converter_chain
17
+ def initialize(source_format, target_format, operations = {})
18
+ @source_format = source_format
19
+ @target_format = target_format
20
+ @operations = operations
21
+ @converter_chain = []
22
+ end
23
+
24
+ # @param [Libis::Format::Converter::Base.class] converter
25
+ # @return [Array[Hash]]
26
+ def append(converter)
27
+ return [] unless converter
28
+ valid_chain_nodes(converter).map do |node|
29
+ self.ddup.add_chain_node(node)
30
+ end.compact
31
+ end
32
+
33
+ def closed?
34
+ !@converter_chain.empty? &&
35
+ @converter_chain.first[:input] == @source_format &&
36
+ @converter_chain.last[:output] == @target_format
37
+ end
38
+
39
+ def valid?
40
+ closed? && apply_operations
17
41
  end
18
42
 
19
43
  def to_array
20
44
  @converter_chain
21
45
  end
22
46
 
23
- def convert(src_file, target_file, operations = [])
47
+ def size
48
+ @converter_chain.size
49
+ end
24
50
 
25
- chain = @converter_chain.clone
51
+ alias_method :length, :size
52
+
53
+ def to_s
54
+ # nodes_string = @converter_chain.map do |node|
55
+ # node_name = node[:converter].name.gsub(/^.*::/,'')
56
+ # node_operations = '(' + node[:operations].map do |operation|
57
+ # op = "#{operation[:method]}:#{operation[:argument]}"
58
+ # op
59
+ # end.join(', ') + ')' rescue ''
60
+ # node_string = "#{node_name}#{node_operations}->-#{node[:output]}"
61
+ # node_string
62
+ # end.join('->-')
63
+ # "#{@source_format}->-#{nodes_string}"
64
+ "#{@source_format}->-#{@converter_chain.map do |node|
65
+ "#{node[:converter].name.gsub(/^.*::/,'')}#{node[:operations] ?
66
+ "(#{node[:operations].each { |operation| "#{operation[:method]}:#{operation[:argument]}" }.join(',')})" :
67
+ ''
68
+ }->-#{node[:output]}"
69
+ end.join('->-')}"
70
+ end
26
71
 
27
- my_operations = {}
72
+ def convert(src_file, target_file)
28
73
 
29
- # sanity check: check if the required operations are supported by at least one converter in the chain
30
- operations.each do |k,v|
31
- method = k.to_s.downcase.to_sym
32
- chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
33
- if chain_element
34
- my_operations[chain_element[:converter]] ||= {}
35
- my_operations[chain_element[:converter]][method] = v
36
- else
37
- error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
38
- end
74
+ unless valid?
75
+ error 'Converter chain is not valid'
76
+ return nil
39
77
  end
40
78
 
41
79
  temp_files = []
42
80
 
43
81
  # noinspection RubyParenthesesAroundConditionInspection
44
- while (chain_element = chain.shift)
82
+ result = @converter_chain.each_with_index do |node, i|
45
83
 
46
- target_type = chain_element[:target]
47
- converter_class = chain_element[:converter]
48
- converter = converter_class.new(src_file)
84
+ target_type = node[:output]
85
+ converter_class = node[:converter]
86
+ converter = converter_class.new
49
87
 
50
- my_operations[converter_class].each do |k,v|
51
- converter.send k, v
88
+ node[:operations].each do |operation|
89
+ converter.send operation[:method], operation[:argument]
52
90
  end
53
91
 
54
92
  target = target_file
55
93
 
56
- unless chain.empty?
57
- target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
58
- target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
94
+ if i < size
95
+ target += ".temp.#{TypeDatabase.type_extentions(target_type).first}"
96
+ target += ".#{TypeDatabase.type_extentions(target_type).first}" while File.exist? target
59
97
  temp_files << target
60
98
  end
61
99
 
62
100
  FileUtils.mkdir_p File.dirname(target)
63
101
 
64
- converter.convert(target, target_type)
102
+ src_file = converter.convert(src_file, target, target_type)
65
103
 
66
- src_file = target
104
+ break :failed unless src_file
67
105
 
68
106
  end
69
107
 
70
108
  temp_files.each do |f|
71
- File.delete(f)
109
+ FileUtils.rm(f, force: true)
110
+ end
111
+
112
+ result == :failed ? nil : target_file
113
+
114
+ end
115
+
116
+ def valid_chain_nodes(converter)
117
+ return [] if closed?
118
+ source_format = @converter_chain.last[:output] rescue @source_format
119
+ nodes = []
120
+ if converter.input_types.include? source_format
121
+ converter.output_types(source_format).each do |format|
122
+ node = {converter: converter, input: source_format, output: format}
123
+ next if node_exists?(node)
124
+ nodes << node
125
+ end
72
126
  end
127
+ nodes
128
+ end
73
129
 
130
+ def add_chain_node(node = {})
131
+ return nil if closed?
132
+ last_converter = @converter_chain.last
133
+ source_format = last_converter ? last_converter[:output] : @source_format
134
+ node[:input] ||= source_format
135
+ return nil unless node[:input] == source_format
136
+ return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
137
+ return nil unless node[:converter].input_types.include? source_format
138
+ return nil if node_exists?(node)
139
+ @converter_chain << node
140
+ # debug "Chain: #{self}"
141
+ self
142
+ end
143
+
144
+ def apply_operations
145
+ return false unless closed?
146
+ temp_chain = @converter_chain.reverse.ddup
147
+ applied = true
148
+ operations = @operations.ddup
149
+ while (operation = operations.shift)
150
+ method = operation.first.to_s.downcase.to_sym
151
+ applied &&= :found == temp_chain.each do |node|
152
+ next unless node[:converter].instance_methods.include?(method)
153
+ node[:operations] ||= []
154
+ node[:operations] << {method: method, argument: operation.last}
155
+ break :found
156
+ end
157
+ end
158
+ if applied && operations.empty?
159
+ @converter_chain = temp_chain.reverse
160
+ @operations.clear
161
+ return true
162
+ end
163
+ false
164
+ end
165
+
166
+
167
+ private
168
+
169
+ def node_exists?(node)
170
+ @converter_chain.detect do |n|
171
+ n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
172
+ end
74
173
  end
75
174
 
76
175
  end