libis-format 0.9.5-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (207) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +18 -0
  4. data/.travis.yml +41 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +39 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/bin/pdf_copy +13 -0
  12. data/data/ISOcoated_v2_eci.icc +0 -0
  13. data/data/PDFA_def.ps +40 -0
  14. data/data/ead.xsd +2728 -0
  15. data/data/eciRGB_v2.icc +0 -0
  16. data/data/lias_formats.xml +106 -0
  17. data/data/types.yml +217 -0
  18. data/lib/libis/format/config.rb +35 -0
  19. data/lib/libis/format/converter/base.rb +101 -0
  20. data/lib/libis/format/converter/chain.rb +167 -0
  21. data/lib/libis/format/converter/image_converter.rb +214 -0
  22. data/lib/libis/format/converter/office_converter.rb +50 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +139 -0
  24. data/lib/libis/format/converter/repository.rb +98 -0
  25. data/lib/libis/format/converter.rb +11 -0
  26. data/lib/libis/format/droid.rb +45 -0
  27. data/lib/libis/format/fido.rb +102 -0
  28. data/lib/libis/format/identifier.rb +189 -0
  29. data/lib/libis/format/office_to_pdf.rb +52 -0
  30. data/lib/libis/format/pdf_copy.rb +40 -0
  31. data/lib/libis/format/pdf_merge.rb +41 -0
  32. data/lib/libis/format/pdf_split.rb +39 -0
  33. data/lib/libis/format/pdf_to_pdfa.rb +76 -0
  34. data/lib/libis/format/pdfa_validator.rb +61 -0
  35. data/lib/libis/format/type_database.rb +170 -0
  36. data/lib/libis/format/version.rb +5 -0
  37. data/lib/libis/format.rb +23 -0
  38. data/lib/libis-format.rb +1 -0
  39. data/libis-format.gemspec +34 -0
  40. data/spec/converter_spec.rb +212 -0
  41. data/spec/data/Cevennes2.bmp +0 -0
  42. data/spec/data/Cevennes2.jp2 +0 -0
  43. data/spec/data/Cevennes2.ppm +22492 -0
  44. data/spec/data/test-ead.xml +392 -0
  45. data/spec/data/test-jpg.tif +0 -0
  46. data/spec/data/test-lzw.tif +0 -0
  47. data/spec/data/test-options.jpg +0 -0
  48. data/spec/data/test.bmp +0 -0
  49. data/spec/data/test.doc +0 -0
  50. data/spec/data/test.docx +0 -0
  51. data/spec/data/test.gif +0 -0
  52. data/spec/data/test.jpg +0 -0
  53. data/spec/data/test.ods +0 -0
  54. data/spec/data/test.odt +0 -0
  55. data/spec/data/test.pdf +0 -0
  56. data/spec/data/test.pdf.tif +0 -0
  57. data/spec/data/test.png +0 -0
  58. data/spec/data/test.ps +8631 -0
  59. data/spec/data/test.psd +0 -0
  60. data/spec/data/test.rtf +1455 -0
  61. data/spec/data/test.tif +0 -0
  62. data/spec/data/test.txt +12 -0
  63. data/spec/data/test.xcf +0 -0
  64. data/spec/data/test.xls +0 -0
  65. data/spec/data/test.xlsx +0 -0
  66. data/spec/data/test.xml +4 -0
  67. data/spec/data/test_pdfa.pdf +0 -0
  68. data/spec/identifier_spec.rb +60 -0
  69. data/spec/spec_helper.rb +9 -0
  70. data/spec/test_types.yml +12 -0
  71. data/spec/type_database_spec.rb +140 -0
  72. data/tools/PdfTool.jar +0 -0
  73. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  74. data/tools/bcprov-jdk15on-1.49.jar +0 -0
  75. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  76. data/tools/droid/container-signature-20150307.xml +2235 -0
  77. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  78. data/tools/droid/droid.bat +154 -0
  79. data/tools/droid/droid.sh +138 -0
  80. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  81. data/tools/droid/lib/activation-1.1.jar +0 -0
  82. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  83. data/tools/droid/lib/antlr-3.2.jar +0 -0
  84. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  85. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  86. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  87. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  88. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  89. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  90. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  91. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  92. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  93. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  94. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  95. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  96. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  97. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  98. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  99. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  100. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  101. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  102. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  103. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  104. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  105. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  106. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  107. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  108. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  109. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  110. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  111. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  112. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  113. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  114. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  115. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  116. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  117. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  118. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  119. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  120. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  121. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  122. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  123. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  124. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  125. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  126. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  127. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  128. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  129. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  130. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  131. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  132. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  133. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  134. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  135. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  136. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  138. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  139. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  140. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  141. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  142. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  143. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  144. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  145. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  146. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  147. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  148. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  149. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  150. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  151. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  152. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  153. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  154. data/tools/droid/lib/jta-1.1.jar +0 -0
  155. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  156. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  157. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  158. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  159. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  160. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  161. data/tools/droid/lib/poi-3.7.jar +0 -0
  162. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  163. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  164. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  165. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  166. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  167. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  168. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  169. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  170. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  171. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  172. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  173. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  174. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  175. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  176. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  177. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  178. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  179. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  180. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  181. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  182. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  183. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  184. data/tools/droid/lib/xz-1.0.jar +0 -0
  185. data/tools/fido/__init__.py +0 -0
  186. data/tools/fido/argparselocal.py +2355 -0
  187. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  188. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  189. data/tools/fido/conf/dc.xsd +119 -0
  190. data/tools/fido/conf/dcmitype.xsd +53 -0
  191. data/tools/fido/conf/dcterms.xsd +383 -0
  192. data/tools/fido/conf/fido-formats.xsd +173 -0
  193. data/tools/fido/conf/format_extension_template.xml +105 -0
  194. data/tools/fido/conf/format_extensions.xml +498 -0
  195. data/tools/fido/conf/formats-v81.xml +38355 -0
  196. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  197. data/tools/fido/conf/versions.xml +8 -0
  198. data/tools/fido/fido.bat +4 -0
  199. data/tools/fido/fido.py +854 -0
  200. data/tools/fido/fido.sh +5 -0
  201. data/tools/fido/prepare.py +616 -0
  202. data/tools/fido/pronomutils.py +115 -0
  203. data/tools/fido/toxml.py +52 -0
  204. data/tools/fido/update_signatures.py +171 -0
  205. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  206. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  207. metadata +396 -0
@@ -0,0 +1,189 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ require 'libis-tools'
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/extend/empty'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ require_relative 'fido'
12
+ require_relative 'droid'
13
+
14
+ module Libis
15
+ module Format
16
+
17
+ class Identifier
18
+ include ::Libis::Tools::Logger
19
+ include Singleton
20
+
21
+ RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
22
+ FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
23
+
24
+ attr_reader :xml_validations
25
+
26
+ protected
27
+
28
+ def initialize
29
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
30
+ @fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
31
+ # noinspection RubyStringKeysInHashInspection
32
+ @xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
33
+ end
34
+
35
+ def result_ok?(result, who_is_asking = nil)
36
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
37
+ return false if result.empty?
38
+ return true unless result[:TYPE].empty?
39
+ return false if RETRY_MIMETYPES.include? result[:mimetype]
40
+ return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
41
+ !(result[:mimetype].empty? and result[:puid].empty?)
42
+ end
43
+
44
+ def get_puid(mimetype)
45
+ ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
46
+ end
47
+
48
+ public
49
+
50
+ def self.add_fido_format(f)
51
+ ::Libis::Format::Fido.add_format f
52
+ end
53
+
54
+ def self.add_xml_validation(mimetype, xsd_file)
55
+ instance.xml_validations[mimetype] = xsd_file
56
+ end
57
+
58
+ def self.xml_validations
59
+ instance.xml_validations
60
+ end
61
+
62
+ def self.get(file_path, options = nil)
63
+ instance.get file_path, options
64
+ end
65
+
66
+ def get(file, options = nil)
67
+
68
+ unless File.exists? file
69
+ error 'File %s cannot be found.', file
70
+ return nil
71
+ end
72
+ if File.directory? file
73
+ error '%s is a directory.', file
74
+ return nil
75
+ end
76
+
77
+ options ||= {}
78
+
79
+ result = { messages: [] }
80
+
81
+ # use FIDO
82
+ # Note: FIDO does not always do a good job, mainly due to lacking container inspection.
83
+ # FIDO misses should be registered in
84
+ result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
85
+
86
+ # use DROID
87
+ result = get_droid_identification file, result
88
+
89
+ # use FILE
90
+ result = get_file_identification(file, result)
91
+
92
+ # Try file extension
93
+ result = get_extension_identification(file, result)
94
+
95
+ # determine XML type. Add custom types at runtime with
96
+ # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
97
+ result = validate_against_xml_schema(file, result)
98
+
99
+ result[:mimetype] ?
100
+ log_msg(result, :info, "Identification of '#{file}': '#{result}'") :
101
+ log_msg(result, :warn, "Could not identify MIME type of '#{file}'")
102
+ end
103
+
104
+ def get_fido_identification(file, result = {}, xtra_formats = nil)
105
+ return result if result_ok? result
106
+
107
+ fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
108
+
109
+ return result unless fido_result.is_a? Hash
110
+
111
+ result.merge! fido_result
112
+ result[:method] = 'fido'
113
+
114
+ log_msg(result, :debug, "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
115
+ end
116
+
117
+ def get_droid_identification(file, result = {})
118
+ return result if result_ok? result, :DROID
119
+ droid_output = ::Libis::Format::Droid.run file
120
+ result[:messages] << [:debug, "DROID: #{droid_output}"]
121
+ warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
122
+ result.clear
123
+ droid_output = droid_output.first
124
+ result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
125
+ result[:matchtype] = droid_output[:method]
126
+ result[:puid] = droid_output[:puid]
127
+ result[:format_name] = droid_output[:format_name]
128
+ result[:format_version] = droid_output[:format_version]
129
+ result[:method] = 'droid'
130
+
131
+ log_msg(result, :debug, "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
132
+ end
133
+
134
+ def get_file_identification(file, result = nil)
135
+ return result if result_ok? result
136
+ begin
137
+ output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
138
+ mimetype = output.strip.split
139
+ if mimetype
140
+ log_msg(result, :debug, "File result: '#{mimetype}'")
141
+ result[:mimetype] = mimetype
142
+ result[:puid] = get_puid(mimetype)
143
+ end
144
+ result[:method] = 'file'
145
+ rescue Exception
146
+ # ignored
147
+ end
148
+ result
149
+ end
150
+
151
+ def get_extension_identification(file, result = nil)
152
+ return result if result_ok? result
153
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
154
+ log_msg result, :debug, "File extension info: #{info}"
155
+ if info
156
+ result[:mimetype] = info[:MIME].first rescue nil
157
+ result[:puid] = info[:PUID].first rescue nil
158
+ end
159
+ result[:method] = 'extension'
160
+ result
161
+ end
162
+
163
+ def validate_against_xml_schema(file, result)
164
+ return result unless result[:mimetype] =~ /^(text|application)\/xml$/
165
+ doc = ::Libis::Tools::XmlDocument.open file
166
+ xml_validations.each do |mime, xsd_file|
167
+ next unless xsd_file
168
+ if doc.validates_against?(xsd_file)
169
+ log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
170
+ result[:mimetype] = mime
171
+ result[:puid] = nil
172
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
173
+ end
174
+ end
175
+ result
176
+ end
177
+
178
+ private
179
+
180
+ def log_msg(result, severity, text)
181
+ return {} unless result.is_a?(Hash)
182
+ (result[:messages] ||= []) << [severity, text]
183
+ result
184
+ end
185
+
186
+ end
187
+
188
+ end
189
+ end
@@ -0,0 +1,52 @@
1
+ require 'fileutils'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class OfficeToPdf
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = {})
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = {})
20
+ workdir = '/...'
21
+ workdir = Dir.tmpdir unless Dir.exist? workdir
22
+
23
+ workdir = File.join(workdir, rand(1000000).to_s)
24
+ FileUtils.mkpath(workdir)
25
+
26
+ src_file = File.join(workdir, File.basename(source))
27
+ FileUtils.link source, src_file
28
+
29
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
30
+
31
+ export_filter = options[:export_filter] || 'pdf'
32
+
33
+ result = Libis::Tools::Command.run(
34
+ Libis::Format::Config[:soffice_path], '--headless',
35
+ '--convert-to', export_filter,
36
+ '--outdir', workdir, src_file
37
+ )
38
+
39
+ unless result[:status] == 0
40
+ warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
41
+ return false
42
+ end
43
+
44
+ FileUtils.copy tgt_file, target, preserve: true
45
+ FileUtils.rmtree workdir
46
+
47
+ result[:out]
48
+ end
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,40 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfCopy
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+
23
+ if OS.java?
24
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
25
+ end
26
+
27
+ Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_path],
29
+ '-cp', jar_file,
30
+ 'CopyPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options
34
+ )
35
+
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,41 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfMerge
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+ source = [source] unless source.is_a?(Array)
23
+
24
+ if OS.java?
25
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
26
+ end
27
+
28
+ Libis::Tools::Command.run(
29
+ Libis::Format::Config[:java_path],
30
+ '-cp', jar_file,
31
+ 'MergePdf',
32
+ '--file_output', target,
33
+ *options,
34
+ *source,
35
+ )
36
+
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfSplit
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ Libis::Tools::Command.run(
27
+ Libis::Format::Config[:java_path],
28
+ '-cp', jar_file,
29
+ 'SplitPdf',
30
+ '--file_input', source,
31
+ '--file_output', target,
32
+ *options
33
+ )
34
+
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,76 @@
1
+ require 'tempfile'
2
+ require 'csv'
3
+ require 'fileutils'
4
+
5
+ require 'libis/tools/extend/string'
6
+ require 'libis/tools/logger'
7
+ require 'libis/tools/command'
8
+
9
+ require 'libis/format'
10
+
11
+ module Libis
12
+ module Format
13
+
14
+ class PdfToPdfa
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.run(source, target = nil, options = {})
18
+ self.new.run source, target, options
19
+ end
20
+
21
+ def run(source, target = nil, options = nil)
22
+
23
+ target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
24
+
25
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
26
+
27
+ icc_info = icc_options(options[:colorspace])
28
+
29
+ icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
30
+ FileUtils.cp(File.join(data_dir, "#{icc_info[:icc_name]}.icc"), icc_file)
31
+
32
+ def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
33
+ File.open(def_filename, 'w') do |f|
34
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
35
+ gsub('[** Fill in ICC profile location **]', icc_file).
36
+ gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
37
+ end
38
+
39
+ result = Libis::Tools::Command.run(
40
+ Libis::Format::Config[:ghostscript_path],
41
+ '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
42
+ '-sColorConversionStrategy=/UseDeviceIndependentColor',
43
+ "-sProcessColorModel=#{icc_info[:device]}",
44
+ '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
45
+ "-sOutputICCProfile=#{icc_file}",
46
+ '-o', File.absolute_path(target),
47
+ def_filename,
48
+ source
49
+ )
50
+
51
+ FileUtils.rm [icc_file, def_filename].compact, force: true
52
+
53
+ unless PdfaValidator.run(target)
54
+ result[:status] = -999
55
+ result[:err] << 'Failed to validate generated PDF/A file.'
56
+ end
57
+
58
+ result
59
+ end
60
+
61
+
62
+ private
63
+
64
+ def icc_options(colorspace)
65
+ case colorspace.to_s.downcase
66
+ when 'cmyk'
67
+ {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
68
+ else
69
+ {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,61 @@
1
+ require 'fileutils'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfaValidator
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source)
16
+ self.new.run source
17
+ end
18
+
19
+ def run(source)
20
+
21
+ src_file = File.absolute_path(source)
22
+
23
+ if (pdfa = Libis::Format::Config[:pdfa_path])
24
+ # Keep it clean: tool generates fontconfig/ cache dir in current working dir
25
+ previous_wd = Dir.getwd
26
+ Dir.chdir(Dir.tmpdir)
27
+
28
+ result = Libis::Tools::Command.run(
29
+ pdfa,
30
+ '--noxml',
31
+ '--level', 'B',
32
+ '--verb', '0',
33
+ src_file
34
+ )
35
+
36
+ Dir.chdir(previous_wd)
37
+
38
+ unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
39
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
40
+ result[:out].join("\n")
41
+ return false
42
+ end
43
+ else
44
+ jar = File.join(ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
45
+ result = Libis::Tools::Command.run(
46
+ Libis::Format::Config[:java_path],
47
+ '-jar', jar,
48
+ src_file
49
+ )
50
+ unless result[:status] == 0
51
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
52
+ result[:out].join("\n")
53
+ return false
54
+ end
55
+ end
56
+ true
57
+ end
58
+ end
59
+
60
+ end
61
+ end
@@ -0,0 +1,170 @@
1
+ # coding: utf-8
2
+
3
+ require 'singleton'
4
+ require 'yaml'
5
+
6
+ require 'backports/rails/hash'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/extend/string'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class TypeDatabase
14
+ include Singleton
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.typeinfo(t)
18
+ self.instance.types[t.to_sym] || {}
19
+ end
20
+
21
+ def self.enrich(info, map_keys = {})
22
+ return {} unless info.is_a? Hash
23
+ mapper = Hash.new {|hash,key| hash[key] = key}
24
+ mapper.merge! map_keys
25
+ unless (puid = info[mapper[:PUID]]).blank?
26
+ info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
27
+ end
28
+ unless (mime = info[mapper[:MIME]]).blank?
29
+ info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
30
+ end
31
+ unless (type_name = info[mapper[:TYPE]]).nil?
32
+ info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
33
+ info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
34
+ info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
35
+ info[mapper[:GROUP]] = self.type_group(type_name)
36
+ end
37
+ info
38
+ end
39
+
40
+ def self.type_group(t)
41
+ typeinfo(t)[:GROUP]
42
+ end
43
+
44
+ def self.type_mimetypes(t)
45
+ typeinfo(t)[:MIME] || []
46
+ end
47
+
48
+ def self.type_puids(t)
49
+ typeinfo(t)[:PUID] || []
50
+ end
51
+
52
+ def self.type_extentions(t)
53
+ typeinfo(t)[:EXTENSIONS] || []
54
+ end
55
+
56
+ def self.group_types(group)
57
+ self.instance.types.select do |_, v|
58
+ v[:GROUP] == group.to_sym
59
+ end.keys
60
+ end
61
+
62
+ def self.puid_infos(puid)
63
+ self.instance.types.select do |_, v|
64
+ v[:PUID].include? puid rescue false
65
+ end.values
66
+ end
67
+
68
+ def self.puid_types(puid)
69
+ self.instance.types.select do |_, v|
70
+ v[:PUID].include? puid rescue false
71
+ end.keys
72
+ end
73
+
74
+ def self.puid_groups(puid)
75
+ puid_types(puid).map do |t|
76
+ type_group t
77
+ end
78
+ end
79
+
80
+ def self.mime_infos(mime)
81
+ self.instance.types.select do |_, v|
82
+ v[:MIME].include? mime rescue false
83
+ end.values
84
+ end
85
+
86
+ def self.mime_types(mime)
87
+ self.instance.types.select do |_, v|
88
+ v[:MIME].include? mime rescue false
89
+ end.keys
90
+ end
91
+
92
+ def self.mime_groups(mime)
93
+ mime_types(mime).map do |t|
94
+ type_group t
95
+ end
96
+ end
97
+
98
+ def self.ext_infos(ext)
99
+ ext = ext.gsub /^\./, ''
100
+ self.instance.types.select do |_, v|
101
+ v[:EXTENSIONS].include?(ext) rescue false
102
+ end.values
103
+ end
104
+
105
+ def self.ext_types(ext)
106
+ ext = ext.gsub /^\./, ''
107
+ self.instance.types.select do |_, v|
108
+ v[:EXTENSIONS].include?(ext) rescue false
109
+ end.keys
110
+ end
111
+
112
+ def self.puid_typeinfo(puid)
113
+ self.instance.types.each do |_, v|
114
+ return v if v[:PUID] and v[:PUID].include?(puid)
115
+ end
116
+ nil
117
+ end
118
+
119
+ def self.known_mime?(mime)
120
+ self.instance.types.each do |_, v|
121
+ return true if v[:MIME].include? mime
122
+ end
123
+ false
124
+ end
125
+
126
+ attr_reader :types
127
+
128
+ def load_types(file_or_hash = {}, append = true)
129
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
130
+ # noinspection RubyResolve
131
+ hash.each do |group, type_info|
132
+ type_info.each do |type_name, info|
133
+ type_key = type_name.to_sym
134
+ info.symbolize_keys!
135
+ info[:TYPE] = type_key
136
+ info[:GROUP] = group.to_sym
137
+ info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
138
+ info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
139
+ info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
140
+ if @types.has_key?(type_key)
141
+ warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
142
+ info.merge!(@types[type_key]) do |_,v_new,v_old|
143
+ case v_old
144
+ when Array
145
+ append ? v_old + v_new : v_new + v_old
146
+ when Hash
147
+ append ? v_new.merge(v_old) : v_old.merge(v_new)
148
+ else
149
+ append ? v_old : v_new
150
+ end
151
+ end
152
+ end
153
+ @types[type_key] = info
154
+ end
155
+ end
156
+ end
157
+
158
+ protected
159
+
160
+ def initialize
161
+ @types = Hash.new
162
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
163
+ type_database = File.join(data_dir, 'types.yml')
164
+ load_types(type_database)
165
+ end
166
+
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,5 @@
1
+ module Libis
2
+ module Format
3
+ VERSION = '0.9.5'
4
+ end
5
+ end
@@ -0,0 +1,23 @@
1
+ require 'libis/format/version'
2
+
3
+ module Libis
4
+ module Format
5
+ autoload :Config, 'libis/format/config'
6
+ autoload :TypeDatabase, 'libis/format/type_database'
7
+ autoload :Identifier, 'libis/format/identifier'
8
+ autoload :Fido, 'libis/format/fido'
9
+ autoload :Droid, 'libis/format/droid'
10
+ autoload :OfficeToPdf, 'libis/format/office_to_pdf'
11
+ autoload :PdfCopy, 'libis/format/pdf_copy'
12
+ autoload :PdfMerge, 'libis/format/pdf_merge'
13
+ autoload :PdfSplit, 'libis/format/pdf_split'
14
+ autoload :PdfToPdfa, 'libis/format/pdf_to_pdfa'
15
+ autoload :PdfaValidator, 'libis/format/pdfa_validator'
16
+
17
+ autoload :Converter, 'libis/format/converter'
18
+
19
+ ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
20
+ DATA_DIR = File.join(ROOT_DIR, 'data')
21
+
22
+ end
23
+ end