libis-format 0.9.5-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +18 -0
  4. data/.travis.yml +41 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +39 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/bin/pdf_copy +13 -0
  12. data/data/ISOcoated_v2_eci.icc +0 -0
  13. data/data/PDFA_def.ps +40 -0
  14. data/data/ead.xsd +2728 -0
  15. data/data/eciRGB_v2.icc +0 -0
  16. data/data/lias_formats.xml +106 -0
  17. data/data/types.yml +217 -0
  18. data/lib/libis/format/config.rb +35 -0
  19. data/lib/libis/format/converter/base.rb +101 -0
  20. data/lib/libis/format/converter/chain.rb +167 -0
  21. data/lib/libis/format/converter/image_converter.rb +214 -0
  22. data/lib/libis/format/converter/office_converter.rb +50 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +139 -0
  24. data/lib/libis/format/converter/repository.rb +98 -0
  25. data/lib/libis/format/converter.rb +11 -0
  26. data/lib/libis/format/droid.rb +45 -0
  27. data/lib/libis/format/fido.rb +102 -0
  28. data/lib/libis/format/identifier.rb +189 -0
  29. data/lib/libis/format/office_to_pdf.rb +52 -0
  30. data/lib/libis/format/pdf_copy.rb +40 -0
  31. data/lib/libis/format/pdf_merge.rb +41 -0
  32. data/lib/libis/format/pdf_split.rb +39 -0
  33. data/lib/libis/format/pdf_to_pdfa.rb +76 -0
  34. data/lib/libis/format/pdfa_validator.rb +61 -0
  35. data/lib/libis/format/type_database.rb +170 -0
  36. data/lib/libis/format/version.rb +5 -0
  37. data/lib/libis/format.rb +23 -0
  38. data/lib/libis-format.rb +1 -0
  39. data/libis-format.gemspec +34 -0
  40. data/spec/converter_spec.rb +212 -0
  41. data/spec/data/Cevennes2.bmp +0 -0
  42. data/spec/data/Cevennes2.jp2 +0 -0
  43. data/spec/data/Cevennes2.ppm +22492 -0
  44. data/spec/data/test-ead.xml +392 -0
  45. data/spec/data/test-jpg.tif +0 -0
  46. data/spec/data/test-lzw.tif +0 -0
  47. data/spec/data/test-options.jpg +0 -0
  48. data/spec/data/test.bmp +0 -0
  49. data/spec/data/test.doc +0 -0
  50. data/spec/data/test.docx +0 -0
  51. data/spec/data/test.gif +0 -0
  52. data/spec/data/test.jpg +0 -0
  53. data/spec/data/test.ods +0 -0
  54. data/spec/data/test.odt +0 -0
  55. data/spec/data/test.pdf +0 -0
  56. data/spec/data/test.pdf.tif +0 -0
  57. data/spec/data/test.png +0 -0
  58. data/spec/data/test.ps +8631 -0
  59. data/spec/data/test.psd +0 -0
  60. data/spec/data/test.rtf +1455 -0
  61. data/spec/data/test.tif +0 -0
  62. data/spec/data/test.txt +12 -0
  63. data/spec/data/test.xcf +0 -0
  64. data/spec/data/test.xls +0 -0
  65. data/spec/data/test.xlsx +0 -0
  66. data/spec/data/test.xml +4 -0
  67. data/spec/data/test_pdfa.pdf +0 -0
  68. data/spec/identifier_spec.rb +60 -0
  69. data/spec/spec_helper.rb +9 -0
  70. data/spec/test_types.yml +12 -0
  71. data/spec/type_database_spec.rb +140 -0
  72. data/tools/PdfTool.jar +0 -0
  73. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  74. data/tools/bcprov-jdk15on-1.49.jar +0 -0
  75. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  76. data/tools/droid/container-signature-20150307.xml +2235 -0
  77. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  78. data/tools/droid/droid.bat +154 -0
  79. data/tools/droid/droid.sh +138 -0
  80. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  81. data/tools/droid/lib/activation-1.1.jar +0 -0
  82. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  83. data/tools/droid/lib/antlr-3.2.jar +0 -0
  84. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  85. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  86. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  87. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  88. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  89. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  90. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  91. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  92. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  93. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  94. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  95. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  96. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  97. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  98. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  99. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  100. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  101. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  102. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  103. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  104. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  105. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  106. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  107. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  108. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  109. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  110. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  111. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  112. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  113. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  114. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  115. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  116. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  117. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  118. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  119. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  120. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  121. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  122. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  123. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  124. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  125. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  126. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  127. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  128. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  129. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  130. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  131. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  132. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  133. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  134. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  135. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  136. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  138. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  139. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  140. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  141. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  142. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  143. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  144. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  145. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  146. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  147. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  148. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  149. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  150. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  151. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  152. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  153. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  154. data/tools/droid/lib/jta-1.1.jar +0 -0
  155. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  156. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  157. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  158. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  159. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  160. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  161. data/tools/droid/lib/poi-3.7.jar +0 -0
  162. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  163. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  164. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  165. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  166. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  167. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  168. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  169. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  170. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  171. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  172. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  173. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  174. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  175. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  176. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  177. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  178. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  179. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  180. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  181. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  182. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  183. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  184. data/tools/droid/lib/xz-1.0.jar +0 -0
  185. data/tools/fido/__init__.py +0 -0
  186. data/tools/fido/argparselocal.py +2355 -0
  187. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  188. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  189. data/tools/fido/conf/dc.xsd +119 -0
  190. data/tools/fido/conf/dcmitype.xsd +53 -0
  191. data/tools/fido/conf/dcterms.xsd +383 -0
  192. data/tools/fido/conf/fido-formats.xsd +173 -0
  193. data/tools/fido/conf/format_extension_template.xml +105 -0
  194. data/tools/fido/conf/format_extensions.xml +498 -0
  195. data/tools/fido/conf/formats-v81.xml +38355 -0
  196. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  197. data/tools/fido/conf/versions.xml +8 -0
  198. data/tools/fido/fido.bat +4 -0
  199. data/tools/fido/fido.py +854 -0
  200. data/tools/fido/fido.sh +5 -0
  201. data/tools/fido/prepare.py +616 -0
  202. data/tools/fido/pronomutils.py +115 -0
  203. data/tools/fido/toxml.py +52 -0
  204. data/tools/fido/update_signatures.py +171 -0
  205. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  206. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  207. metadata +396 -0
@@ -0,0 +1,189 @@
1
+ # encoding: utf-8
2
+
3
+ require 'singleton'
4
+
5
+ require 'libis-tools'
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/extend/empty'
8
+
9
+ require 'libis/format/type_database'
10
+
11
+ require_relative 'fido'
12
+ require_relative 'droid'
13
+
14
+ module Libis
15
+ module Format
16
+
17
+ class Identifier
18
+ include ::Libis::Tools::Logger
19
+ include Singleton
20
+
21
+ RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
22
+ FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
23
+
24
+ attr_reader :xml_validations
25
+
26
+ protected
27
+
28
+ def initialize
29
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
30
+ @fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
31
+ # noinspection RubyStringKeysInHashInspection
32
+ @xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
33
+ end
34
+
35
+ def result_ok?(result, who_is_asking = nil)
36
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
37
+ return false if result.empty?
38
+ return true unless result[:TYPE].empty?
39
+ return false if RETRY_MIMETYPES.include? result[:mimetype]
40
+ return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
41
+ !(result[:mimetype].empty? and result[:puid].empty?)
42
+ end
43
+
44
+ def get_puid(mimetype)
45
+ ::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
46
+ end
47
+
48
+ public
49
+
50
+ def self.add_fido_format(f)
51
+ ::Libis::Format::Fido.add_format f
52
+ end
53
+
54
+ def self.add_xml_validation(mimetype, xsd_file)
55
+ instance.xml_validations[mimetype] = xsd_file
56
+ end
57
+
58
+ def self.xml_validations
59
+ instance.xml_validations
60
+ end
61
+
62
+ def self.get(file_path, options = nil)
63
+ instance.get file_path, options
64
+ end
65
+
66
+ def get(file, options = nil)
67
+
68
+ unless File.exists? file
69
+ error 'File %s cannot be found.', file
70
+ return nil
71
+ end
72
+ if File.directory? file
73
+ error '%s is a directory.', file
74
+ return nil
75
+ end
76
+
77
+ options ||= {}
78
+
79
+ result = { messages: [] }
80
+
81
+ # use FIDO
82
+ # Note: FIDO does not always do a good job, mainly due to lacking container inspection.
83
+ # FIDO misses should be registered in
84
+ result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
85
+
86
+ # use DROID
87
+ result = get_droid_identification file, result
88
+
89
+ # use FILE
90
+ result = get_file_identification(file, result)
91
+
92
+ # Try file extension
93
+ result = get_extension_identification(file, result)
94
+
95
+ # determine XML type. Add custom types at runtime with
96
+ # Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
97
+ result = validate_against_xml_schema(file, result)
98
+
99
+ result[:mimetype] ?
100
+ log_msg(result, :info, "Identification of '#{file}': '#{result}'") :
101
+ log_msg(result, :warn, "Could not identify MIME type of '#{file}'")
102
+ end
103
+
104
+ def get_fido_identification(file, result = {}, xtra_formats = nil)
105
+ return result if result_ok? result
106
+
107
+ fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
108
+
109
+ return result unless fido_result.is_a? Hash
110
+
111
+ result.merge! fido_result
112
+ result[:method] = 'fido'
113
+
114
+ log_msg(result, :debug, "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
115
+ end
116
+
117
+ def get_droid_identification(file, result = {})
118
+ return result if result_ok? result, :DROID
119
+ droid_output = ::Libis::Format::Droid.run file
120
+ result[:messages] << [:debug, "DROID: #{droid_output}"]
121
+ warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
122
+ result.clear
123
+ droid_output = droid_output.first
124
+ result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
125
+ result[:matchtype] = droid_output[:method]
126
+ result[:puid] = droid_output[:puid]
127
+ result[:format_name] = droid_output[:format_name]
128
+ result[:format_version] = droid_output[:format_version]
129
+ result[:method] = 'droid'
130
+
131
+ log_msg(result, :debug, "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
132
+ end
133
+
134
+ def get_file_identification(file, result = nil)
135
+ return result if result_ok? result
136
+ begin
137
+ output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
138
+ mimetype = output.strip.split
139
+ if mimetype
140
+ log_msg(result, :debug, "File result: '#{mimetype}'")
141
+ result[:mimetype] = mimetype
142
+ result[:puid] = get_puid(mimetype)
143
+ end
144
+ result[:method] = 'file'
145
+ rescue Exception
146
+ # ignored
147
+ end
148
+ result
149
+ end
150
+
151
+ def get_extension_identification(file, result = nil)
152
+ return result if result_ok? result
153
+ info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
154
+ log_msg result, :debug, "File extension info: #{info}"
155
+ if info
156
+ result[:mimetype] = info[:MIME].first rescue nil
157
+ result[:puid] = info[:PUID].first rescue nil
158
+ end
159
+ result[:method] = 'extension'
160
+ result
161
+ end
162
+
163
+ def validate_against_xml_schema(file, result)
164
+ return result unless result[:mimetype] =~ /^(text|application)\/xml$/
165
+ doc = ::Libis::Tools::XmlDocument.open file
166
+ xml_validations.each do |mime, xsd_file|
167
+ next unless xsd_file
168
+ if doc.validates_against?(xsd_file)
169
+ log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
170
+ result[:mimetype] = mime
171
+ result[:puid] = nil
172
+ result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
173
+ end
174
+ end
175
+ result
176
+ end
177
+
178
+ private
179
+
180
+ def log_msg(result, severity, text)
181
+ return {} unless result.is_a?(Hash)
182
+ (result[:messages] ||= []) << [severity, text]
183
+ result
184
+ end
185
+
186
+ end
187
+
188
+ end
189
+ end
@@ -0,0 +1,52 @@
1
+ require 'fileutils'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class OfficeToPdf
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = {})
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = {})
20
+ workdir = '/...'
21
+ workdir = Dir.tmpdir unless Dir.exist? workdir
22
+
23
+ workdir = File.join(workdir, rand(1000000).to_s)
24
+ FileUtils.mkpath(workdir)
25
+
26
+ src_file = File.join(workdir, File.basename(source))
27
+ FileUtils.link source, src_file
28
+
29
+ tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
30
+
31
+ export_filter = options[:export_filter] || 'pdf'
32
+
33
+ result = Libis::Tools::Command.run(
34
+ Libis::Format::Config[:soffice_path], '--headless',
35
+ '--convert-to', export_filter,
36
+ '--outdir', workdir, src_file
37
+ )
38
+
39
+ unless result[:status] == 0
40
+ warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
41
+ return false
42
+ end
43
+
44
+ FileUtils.copy tgt_file, target, preserve: true
45
+ FileUtils.rmtree workdir
46
+
47
+ result[:out]
48
+ end
49
+ end
50
+
51
+ end
52
+ end
@@ -0,0 +1,40 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfCopy
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+
23
+ if OS.java?
24
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
25
+ end
26
+
27
+ Libis::Tools::Command.run(
28
+ Libis::Format::Config[:java_path],
29
+ '-cp', jar_file,
30
+ 'CopyPdf',
31
+ '--file_input', source,
32
+ '--file_output', target,
33
+ *options
34
+ )
35
+
36
+ end
37
+ end
38
+
39
+ end
40
+ end
@@ -0,0 +1,41 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfMerge
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+ source = [source] unless source.is_a?(Array)
23
+
24
+ if OS.java?
25
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
26
+ end
27
+
28
+ Libis::Tools::Command.run(
29
+ Libis::Format::Config[:java_path],
30
+ '-cp', jar_file,
31
+ 'MergePdf',
32
+ '--file_output', target,
33
+ *options,
34
+ *source,
35
+ )
36
+
37
+ end
38
+ end
39
+
40
+ end
41
+ end
@@ -0,0 +1,39 @@
1
+ require 'os'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfSplit
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source, target, options = [])
16
+ self.new.run source, target, options
17
+ end
18
+
19
+ def run(source, target, options = [])
20
+ tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
21
+ jar_file = File.join(tool_dir, 'PdfTool.jar')
22
+ if OS.java?
23
+ # TODO: import library and execute in current VM. For now do exactly as in MRI.
24
+ end
25
+
26
+ Libis::Tools::Command.run(
27
+ Libis::Format::Config[:java_path],
28
+ '-cp', jar_file,
29
+ 'SplitPdf',
30
+ '--file_input', source,
31
+ '--file_output', target,
32
+ *options
33
+ )
34
+
35
+ end
36
+ end
37
+
38
+ end
39
+ end
@@ -0,0 +1,76 @@
1
+ require 'tempfile'
2
+ require 'csv'
3
+ require 'fileutils'
4
+
5
+ require 'libis/tools/extend/string'
6
+ require 'libis/tools/logger'
7
+ require 'libis/tools/command'
8
+
9
+ require 'libis/format'
10
+
11
+ module Libis
12
+ module Format
13
+
14
+ class PdfToPdfa
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.run(source, target = nil, options = {})
18
+ self.new.run source, target, options
19
+ end
20
+
21
+ def run(source, target = nil, options = nil)
22
+
23
+ target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
24
+
25
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
26
+
27
+ icc_info = icc_options(options[:colorspace])
28
+
29
+ icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
30
+ FileUtils.cp(File.join(data_dir, "#{icc_info[:icc_name]}.icc"), icc_file)
31
+
32
+ def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
33
+ File.open(def_filename, 'w') do |f|
34
+ f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
35
+ gsub('[** Fill in ICC profile location **]', icc_file).
36
+ gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
37
+ end
38
+
39
+ result = Libis::Tools::Command.run(
40
+ Libis::Format::Config[:ghostscript_path],
41
+ '-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
42
+ '-sColorConversionStrategy=/UseDeviceIndependentColor',
43
+ "-sProcessColorModel=#{icc_info[:device]}",
44
+ '-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
45
+ "-sOutputICCProfile=#{icc_file}",
46
+ '-o', File.absolute_path(target),
47
+ def_filename,
48
+ source
49
+ )
50
+
51
+ FileUtils.rm [icc_file, def_filename].compact, force: true
52
+
53
+ unless PdfaValidator.run(target)
54
+ result[:status] = -999
55
+ result[:err] << 'Failed to validate generated PDF/A file.'
56
+ end
57
+
58
+ result
59
+ end
60
+
61
+
62
+ private
63
+
64
+ def icc_options(colorspace)
65
+ case colorspace.to_s.downcase
66
+ when 'cmyk'
67
+ {icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
68
+ else
69
+ {icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
70
+ end
71
+ end
72
+
73
+ end
74
+
75
+ end
76
+ end
@@ -0,0 +1,61 @@
1
+ require 'fileutils'
2
+
3
+ require 'libis/tools/extend/string'
4
+ require 'libis/tools/logger'
5
+ require 'libis/tools/command'
6
+
7
+ require 'libis/format/config'
8
+
9
+ module Libis
10
+ module Format
11
+
12
+ class PdfaValidator
13
+ include ::Libis::Tools::Logger
14
+
15
+ def self.run(source)
16
+ self.new.run source
17
+ end
18
+
19
+ def run(source)
20
+
21
+ src_file = File.absolute_path(source)
22
+
23
+ if (pdfa = Libis::Format::Config[:pdfa_path])
24
+ # Keep it clean: tool generates fontconfig/ cache dir in current working dir
25
+ previous_wd = Dir.getwd
26
+ Dir.chdir(Dir.tmpdir)
27
+
28
+ result = Libis::Tools::Command.run(
29
+ pdfa,
30
+ '--noxml',
31
+ '--level', 'B',
32
+ '--verb', '0',
33
+ src_file
34
+ )
35
+
36
+ Dir.chdir(previous_wd)
37
+
38
+ unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
39
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
40
+ result[:out].join("\n")
41
+ return false
42
+ end
43
+ else
44
+ jar = File.join(ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
45
+ result = Libis::Tools::Command.run(
46
+ Libis::Format::Config[:java_path],
47
+ '-jar', jar,
48
+ src_file
49
+ )
50
+ unless result[:status] == 0
51
+ warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
52
+ result[:out].join("\n")
53
+ return false
54
+ end
55
+ end
56
+ true
57
+ end
58
+ end
59
+
60
+ end
61
+ end
@@ -0,0 +1,170 @@
1
+ # coding: utf-8
2
+
3
+ require 'singleton'
4
+ require 'yaml'
5
+
6
+ require 'backports/rails/hash'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/extend/string'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class TypeDatabase
14
+ include Singleton
15
+ include ::Libis::Tools::Logger
16
+
17
+ def self.typeinfo(t)
18
+ self.instance.types[t.to_sym] || {}
19
+ end
20
+
21
+ def self.enrich(info, map_keys = {})
22
+ return {} unless info.is_a? Hash
23
+ mapper = Hash.new {|hash,key| hash[key] = key}
24
+ mapper.merge! map_keys
25
+ unless (puid = info[mapper[:PUID]]).blank?
26
+ info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
27
+ end
28
+ unless (mime = info[mapper[:MIME]]).blank?
29
+ info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
30
+ end
31
+ unless (type_name = info[mapper[:TYPE]]).nil?
32
+ info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
33
+ info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
34
+ info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
35
+ info[mapper[:GROUP]] = self.type_group(type_name)
36
+ end
37
+ info
38
+ end
39
+
40
+ def self.type_group(t)
41
+ typeinfo(t)[:GROUP]
42
+ end
43
+
44
+ def self.type_mimetypes(t)
45
+ typeinfo(t)[:MIME] || []
46
+ end
47
+
48
+ def self.type_puids(t)
49
+ typeinfo(t)[:PUID] || []
50
+ end
51
+
52
+ def self.type_extentions(t)
53
+ typeinfo(t)[:EXTENSIONS] || []
54
+ end
55
+
56
+ def self.group_types(group)
57
+ self.instance.types.select do |_, v|
58
+ v[:GROUP] == group.to_sym
59
+ end.keys
60
+ end
61
+
62
+ def self.puid_infos(puid)
63
+ self.instance.types.select do |_, v|
64
+ v[:PUID].include? puid rescue false
65
+ end.values
66
+ end
67
+
68
+ def self.puid_types(puid)
69
+ self.instance.types.select do |_, v|
70
+ v[:PUID].include? puid rescue false
71
+ end.keys
72
+ end
73
+
74
+ def self.puid_groups(puid)
75
+ puid_types(puid).map do |t|
76
+ type_group t
77
+ end
78
+ end
79
+
80
+ def self.mime_infos(mime)
81
+ self.instance.types.select do |_, v|
82
+ v[:MIME].include? mime rescue false
83
+ end.values
84
+ end
85
+
86
+ def self.mime_types(mime)
87
+ self.instance.types.select do |_, v|
88
+ v[:MIME].include? mime rescue false
89
+ end.keys
90
+ end
91
+
92
+ def self.mime_groups(mime)
93
+ mime_types(mime).map do |t|
94
+ type_group t
95
+ end
96
+ end
97
+
98
+ def self.ext_infos(ext)
99
+ ext = ext.gsub /^\./, ''
100
+ self.instance.types.select do |_, v|
101
+ v[:EXTENSIONS].include?(ext) rescue false
102
+ end.values
103
+ end
104
+
105
+ def self.ext_types(ext)
106
+ ext = ext.gsub /^\./, ''
107
+ self.instance.types.select do |_, v|
108
+ v[:EXTENSIONS].include?(ext) rescue false
109
+ end.keys
110
+ end
111
+
112
+ def self.puid_typeinfo(puid)
113
+ self.instance.types.each do |_, v|
114
+ return v if v[:PUID] and v[:PUID].include?(puid)
115
+ end
116
+ nil
117
+ end
118
+
119
+ def self.known_mime?(mime)
120
+ self.instance.types.each do |_, v|
121
+ return true if v[:MIME].include? mime
122
+ end
123
+ false
124
+ end
125
+
126
+ attr_reader :types
127
+
128
+ def load_types(file_or_hash = {}, append = true)
129
+ hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
130
+ # noinspection RubyResolve
131
+ hash.each do |group, type_info|
132
+ type_info.each do |type_name, info|
133
+ type_key = type_name.to_sym
134
+ info.symbolize_keys!
135
+ info[:TYPE] = type_key
136
+ info[:GROUP] = group.to_sym
137
+ info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
138
+ info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
139
+ info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
140
+ if @types.has_key?(type_key)
141
+ warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
142
+ info.merge!(@types[type_key]) do |_,v_new,v_old|
143
+ case v_old
144
+ when Array
145
+ append ? v_old + v_new : v_new + v_old
146
+ when Hash
147
+ append ? v_new.merge(v_old) : v_old.merge(v_new)
148
+ else
149
+ append ? v_old : v_new
150
+ end
151
+ end
152
+ end
153
+ @types[type_key] = info
154
+ end
155
+ end
156
+ end
157
+
158
+ protected
159
+
160
+ def initialize
161
+ @types = Hash.new
162
+ data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
163
+ type_database = File.join(data_dir, 'types.yml')
164
+ load_types(type_database)
165
+ end
166
+
167
+ end
168
+
169
+ end
170
+ end
@@ -0,0 +1,5 @@
1
+ module Libis
2
+ module Format
3
+ VERSION = '0.9.5'
4
+ end
5
+ end
@@ -0,0 +1,23 @@
1
+ require 'libis/format/version'
2
+
3
+ module Libis
4
+ module Format
5
+ autoload :Config, 'libis/format/config'
6
+ autoload :TypeDatabase, 'libis/format/type_database'
7
+ autoload :Identifier, 'libis/format/identifier'
8
+ autoload :Fido, 'libis/format/fido'
9
+ autoload :Droid, 'libis/format/droid'
10
+ autoload :OfficeToPdf, 'libis/format/office_to_pdf'
11
+ autoload :PdfCopy, 'libis/format/pdf_copy'
12
+ autoload :PdfMerge, 'libis/format/pdf_merge'
13
+ autoload :PdfSplit, 'libis/format/pdf_split'
14
+ autoload :PdfToPdfa, 'libis/format/pdf_to_pdfa'
15
+ autoload :PdfaValidator, 'libis/format/pdfa_validator'
16
+
17
+ autoload :Converter, 'libis/format/converter'
18
+
19
+ ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
20
+ DATA_DIR = File.join(ROOT_DIR, 'data')
21
+
22
+ end
23
+ end