libis-format 0.9.5-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +18 -0
- data/.travis.yml +41 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +40 -0
- data/data/ead.xsd +2728 -0
- data/data/eciRGB_v2.icc +0 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +217 -0
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +101 -0
- data/lib/libis/format/converter/chain.rb +167 -0
- data/lib/libis/format/converter/image_converter.rb +214 -0
- data/lib/libis/format/converter/office_converter.rb +50 -0
- data/lib/libis/format/converter/pdf_converter.rb +139 -0
- data/lib/libis/format/converter/repository.rb +98 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +45 -0
- data/lib/libis/format/fido.rb +102 -0
- data/lib/libis/format/identifier.rb +189 -0
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +40 -0
- data/lib/libis/format/pdf_merge.rb +41 -0
- data/lib/libis/format/pdf_split.rb +39 -0
- data/lib/libis/format/pdf_to_pdfa.rb +76 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +23 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +34 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +60 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +396 -0
@@ -0,0 +1,189 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
|
5
|
+
require 'libis-tools'
|
6
|
+
require 'libis/tools/extend/string'
|
7
|
+
require 'libis/tools/extend/empty'
|
8
|
+
|
9
|
+
require 'libis/format/type_database'
|
10
|
+
|
11
|
+
require_relative 'fido'
|
12
|
+
require_relative 'droid'
|
13
|
+
|
14
|
+
module Libis
|
15
|
+
module Format
|
16
|
+
|
17
|
+
class Identifier
|
18
|
+
include ::Libis::Tools::Logger
|
19
|
+
include Singleton
|
20
|
+
|
21
|
+
RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
|
22
|
+
FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
|
23
|
+
|
24
|
+
attr_reader :xml_validations
|
25
|
+
|
26
|
+
protected
|
27
|
+
|
28
|
+
def initialize
|
29
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
30
|
+
@fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
31
|
+
# noinspection RubyStringKeysInHashInspection
|
32
|
+
@xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
|
33
|
+
end
|
34
|
+
|
35
|
+
def result_ok?(result, who_is_asking = nil)
|
36
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
37
|
+
return false if result.empty?
|
38
|
+
return true unless result[:TYPE].empty?
|
39
|
+
return false if RETRY_MIMETYPES.include? result[:mimetype]
|
40
|
+
return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
|
41
|
+
!(result[:mimetype].empty? and result[:puid].empty?)
|
42
|
+
end
|
43
|
+
|
44
|
+
def get_puid(mimetype)
|
45
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
|
46
|
+
end
|
47
|
+
|
48
|
+
public
|
49
|
+
|
50
|
+
def self.add_fido_format(f)
|
51
|
+
::Libis::Format::Fido.add_format f
|
52
|
+
end
|
53
|
+
|
54
|
+
def self.add_xml_validation(mimetype, xsd_file)
|
55
|
+
instance.xml_validations[mimetype] = xsd_file
|
56
|
+
end
|
57
|
+
|
58
|
+
def self.xml_validations
|
59
|
+
instance.xml_validations
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.get(file_path, options = nil)
|
63
|
+
instance.get file_path, options
|
64
|
+
end
|
65
|
+
|
66
|
+
def get(file, options = nil)
|
67
|
+
|
68
|
+
unless File.exists? file
|
69
|
+
error 'File %s cannot be found.', file
|
70
|
+
return nil
|
71
|
+
end
|
72
|
+
if File.directory? file
|
73
|
+
error '%s is a directory.', file
|
74
|
+
return nil
|
75
|
+
end
|
76
|
+
|
77
|
+
options ||= {}
|
78
|
+
|
79
|
+
result = { messages: [] }
|
80
|
+
|
81
|
+
# use FIDO
|
82
|
+
# Note: FIDO does not always do a good job, mainly due to lacking container inspection.
|
83
|
+
# FIDO misses should be registered in
|
84
|
+
result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
|
85
|
+
|
86
|
+
# use DROID
|
87
|
+
result = get_droid_identification file, result
|
88
|
+
|
89
|
+
# use FILE
|
90
|
+
result = get_file_identification(file, result)
|
91
|
+
|
92
|
+
# Try file extension
|
93
|
+
result = get_extension_identification(file, result)
|
94
|
+
|
95
|
+
# determine XML type. Add custom types at runtime with
|
96
|
+
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
97
|
+
result = validate_against_xml_schema(file, result)
|
98
|
+
|
99
|
+
result[:mimetype] ?
|
100
|
+
log_msg(result, :info, "Identification of '#{file}': '#{result}'") :
|
101
|
+
log_msg(result, :warn, "Could not identify MIME type of '#{file}'")
|
102
|
+
end
|
103
|
+
|
104
|
+
def get_fido_identification(file, result = {}, xtra_formats = nil)
|
105
|
+
return result if result_ok? result
|
106
|
+
|
107
|
+
fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
|
108
|
+
|
109
|
+
return result unless fido_result.is_a? Hash
|
110
|
+
|
111
|
+
result.merge! fido_result
|
112
|
+
result[:method] = 'fido'
|
113
|
+
|
114
|
+
log_msg(result, :debug, "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
|
115
|
+
end
|
116
|
+
|
117
|
+
def get_droid_identification(file, result = {})
|
118
|
+
return result if result_ok? result, :DROID
|
119
|
+
droid_output = ::Libis::Format::Droid.run file
|
120
|
+
result[:messages] << [:debug, "DROID: #{droid_output}"]
|
121
|
+
warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
|
122
|
+
result.clear
|
123
|
+
droid_output = droid_output.first
|
124
|
+
result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
|
125
|
+
result[:matchtype] = droid_output[:method]
|
126
|
+
result[:puid] = droid_output[:puid]
|
127
|
+
result[:format_name] = droid_output[:format_name]
|
128
|
+
result[:format_version] = droid_output[:format_version]
|
129
|
+
result[:method] = 'droid'
|
130
|
+
|
131
|
+
log_msg(result, :debug, "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
|
132
|
+
end
|
133
|
+
|
134
|
+
def get_file_identification(file, result = nil)
|
135
|
+
return result if result_ok? result
|
136
|
+
begin
|
137
|
+
output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
|
138
|
+
mimetype = output.strip.split
|
139
|
+
if mimetype
|
140
|
+
log_msg(result, :debug, "File result: '#{mimetype}'")
|
141
|
+
result[:mimetype] = mimetype
|
142
|
+
result[:puid] = get_puid(mimetype)
|
143
|
+
end
|
144
|
+
result[:method] = 'file'
|
145
|
+
rescue Exception
|
146
|
+
# ignored
|
147
|
+
end
|
148
|
+
result
|
149
|
+
end
|
150
|
+
|
151
|
+
def get_extension_identification(file, result = nil)
|
152
|
+
return result if result_ok? result
|
153
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
154
|
+
log_msg result, :debug, "File extension info: #{info}"
|
155
|
+
if info
|
156
|
+
result[:mimetype] = info[:MIME].first rescue nil
|
157
|
+
result[:puid] = info[:PUID].first rescue nil
|
158
|
+
end
|
159
|
+
result[:method] = 'extension'
|
160
|
+
result
|
161
|
+
end
|
162
|
+
|
163
|
+
def validate_against_xml_schema(file, result)
|
164
|
+
return result unless result[:mimetype] =~ /^(text|application)\/xml$/
|
165
|
+
doc = ::Libis::Tools::XmlDocument.open file
|
166
|
+
xml_validations.each do |mime, xsd_file|
|
167
|
+
next unless xsd_file
|
168
|
+
if doc.validates_against?(xsd_file)
|
169
|
+
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
170
|
+
result[:mimetype] = mime
|
171
|
+
result[:puid] = nil
|
172
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
173
|
+
end
|
174
|
+
end
|
175
|
+
result
|
176
|
+
end
|
177
|
+
|
178
|
+
private
|
179
|
+
|
180
|
+
def log_msg(result, severity, text)
|
181
|
+
return {} unless result.is_a?(Hash)
|
182
|
+
(result[:messages] ||= []) << [severity, text]
|
183
|
+
result
|
184
|
+
end
|
185
|
+
|
186
|
+
end
|
187
|
+
|
188
|
+
end
|
189
|
+
end
|
@@ -0,0 +1,52 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
|
12
|
+
class OfficeToPdf
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def self.run(source, target, options = {})
|
16
|
+
self.new.run source, target, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(source, target, options = {})
|
20
|
+
workdir = '/...'
|
21
|
+
workdir = Dir.tmpdir unless Dir.exist? workdir
|
22
|
+
|
23
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
24
|
+
FileUtils.mkpath(workdir)
|
25
|
+
|
26
|
+
src_file = File.join(workdir, File.basename(source))
|
27
|
+
FileUtils.link source, src_file
|
28
|
+
|
29
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
30
|
+
|
31
|
+
export_filter = options[:export_filter] || 'pdf'
|
32
|
+
|
33
|
+
result = Libis::Tools::Command.run(
|
34
|
+
Libis::Format::Config[:soffice_path], '--headless',
|
35
|
+
'--convert-to', export_filter,
|
36
|
+
'--outdir', workdir, src_file
|
37
|
+
)
|
38
|
+
|
39
|
+
unless result[:status] == 0
|
40
|
+
warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
|
41
|
+
return false
|
42
|
+
end
|
43
|
+
|
44
|
+
FileUtils.copy tgt_file, target, preserve: true
|
45
|
+
FileUtils.rmtree workdir
|
46
|
+
|
47
|
+
result[:out]
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
|
12
|
+
class PdfCopy
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def self.run(source, target, options = [])
|
16
|
+
self.new.run source, target, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(source, target, options = [])
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
+
|
23
|
+
if OS.java?
|
24
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
25
|
+
end
|
26
|
+
|
27
|
+
Libis::Tools::Command.run(
|
28
|
+
Libis::Format::Config[:java_path],
|
29
|
+
'-cp', jar_file,
|
30
|
+
'CopyPdf',
|
31
|
+
'--file_input', source,
|
32
|
+
'--file_output', target,
|
33
|
+
*options
|
34
|
+
)
|
35
|
+
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,41 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
|
12
|
+
class PdfMerge
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def self.run(source, target, options = [])
|
16
|
+
self.new.run source, target, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(source, target, options = [])
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
+
source = [source] unless source.is_a?(Array)
|
23
|
+
|
24
|
+
if OS.java?
|
25
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
26
|
+
end
|
27
|
+
|
28
|
+
Libis::Tools::Command.run(
|
29
|
+
Libis::Format::Config[:java_path],
|
30
|
+
'-cp', jar_file,
|
31
|
+
'MergePdf',
|
32
|
+
'--file_output', target,
|
33
|
+
*options,
|
34
|
+
*source,
|
35
|
+
)
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
end
|
41
|
+
end
|
@@ -0,0 +1,39 @@
|
|
1
|
+
require 'os'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
|
12
|
+
class PdfSplit
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def self.run(source, target, options = [])
|
16
|
+
self.new.run source, target, options
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(source, target, options = [])
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
22
|
+
if OS.java?
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
24
|
+
end
|
25
|
+
|
26
|
+
Libis::Tools::Command.run(
|
27
|
+
Libis::Format::Config[:java_path],
|
28
|
+
'-cp', jar_file,
|
29
|
+
'SplitPdf',
|
30
|
+
'--file_input', source,
|
31
|
+
'--file_output', target,
|
32
|
+
*options
|
33
|
+
)
|
34
|
+
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
end
|
39
|
+
end
|
@@ -0,0 +1,76 @@
|
|
1
|
+
require 'tempfile'
|
2
|
+
require 'csv'
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
require 'libis/tools/extend/string'
|
6
|
+
require 'libis/tools/logger'
|
7
|
+
require 'libis/tools/command'
|
8
|
+
|
9
|
+
require 'libis/format'
|
10
|
+
|
11
|
+
module Libis
|
12
|
+
module Format
|
13
|
+
|
14
|
+
class PdfToPdfa
|
15
|
+
include ::Libis::Tools::Logger
|
16
|
+
|
17
|
+
def self.run(source, target = nil, options = {})
|
18
|
+
self.new.run source, target, options
|
19
|
+
end
|
20
|
+
|
21
|
+
def run(source, target = nil, options = nil)
|
22
|
+
|
23
|
+
target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
|
24
|
+
|
25
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
26
|
+
|
27
|
+
icc_info = icc_options(options[:colorspace])
|
28
|
+
|
29
|
+
icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
|
30
|
+
FileUtils.cp(File.join(data_dir, "#{icc_info[:icc_name]}.icc"), icc_file)
|
31
|
+
|
32
|
+
def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
|
33
|
+
File.open(def_filename, 'w') do |f|
|
34
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
35
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
36
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
37
|
+
end
|
38
|
+
|
39
|
+
result = Libis::Tools::Command.run(
|
40
|
+
Libis::Format::Config[:ghostscript_path],
|
41
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
42
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
43
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
44
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
45
|
+
"-sOutputICCProfile=#{icc_file}",
|
46
|
+
'-o', File.absolute_path(target),
|
47
|
+
def_filename,
|
48
|
+
source
|
49
|
+
)
|
50
|
+
|
51
|
+
FileUtils.rm [icc_file, def_filename].compact, force: true
|
52
|
+
|
53
|
+
unless PdfaValidator.run(target)
|
54
|
+
result[:status] = -999
|
55
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
56
|
+
end
|
57
|
+
|
58
|
+
result
|
59
|
+
end
|
60
|
+
|
61
|
+
|
62
|
+
private
|
63
|
+
|
64
|
+
def icc_options(colorspace)
|
65
|
+
case colorspace.to_s.downcase
|
66
|
+
when 'cmyk'
|
67
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
68
|
+
else
|
69
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
70
|
+
end
|
71
|
+
end
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
require 'fileutils'
|
2
|
+
|
3
|
+
require 'libis/tools/extend/string'
|
4
|
+
require 'libis/tools/logger'
|
5
|
+
require 'libis/tools/command'
|
6
|
+
|
7
|
+
require 'libis/format/config'
|
8
|
+
|
9
|
+
module Libis
|
10
|
+
module Format
|
11
|
+
|
12
|
+
class PdfaValidator
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def self.run(source)
|
16
|
+
self.new.run source
|
17
|
+
end
|
18
|
+
|
19
|
+
def run(source)
|
20
|
+
|
21
|
+
src_file = File.absolute_path(source)
|
22
|
+
|
23
|
+
if (pdfa = Libis::Format::Config[:pdfa_path])
|
24
|
+
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
25
|
+
previous_wd = Dir.getwd
|
26
|
+
Dir.chdir(Dir.tmpdir)
|
27
|
+
|
28
|
+
result = Libis::Tools::Command.run(
|
29
|
+
pdfa,
|
30
|
+
'--noxml',
|
31
|
+
'--level', 'B',
|
32
|
+
'--verb', '0',
|
33
|
+
src_file
|
34
|
+
)
|
35
|
+
|
36
|
+
Dir.chdir(previous_wd)
|
37
|
+
|
38
|
+
unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
|
39
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
40
|
+
result[:out].join("\n")
|
41
|
+
return false
|
42
|
+
end
|
43
|
+
else
|
44
|
+
jar = File.join(ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
|
45
|
+
result = Libis::Tools::Command.run(
|
46
|
+
Libis::Format::Config[:java_path],
|
47
|
+
'-jar', jar,
|
48
|
+
src_file
|
49
|
+
)
|
50
|
+
unless result[:status] == 0
|
51
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
52
|
+
result[:out].join("\n")
|
53
|
+
return false
|
54
|
+
end
|
55
|
+
end
|
56
|
+
true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,170 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'singleton'
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
require 'backports/rails/hash'
|
7
|
+
require 'libis/tools/logger'
|
8
|
+
require 'libis/tools/extend/string'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
|
13
|
+
class TypeDatabase
|
14
|
+
include Singleton
|
15
|
+
include ::Libis::Tools::Logger
|
16
|
+
|
17
|
+
def self.typeinfo(t)
|
18
|
+
self.instance.types[t.to_sym] || {}
|
19
|
+
end
|
20
|
+
|
21
|
+
def self.enrich(info, map_keys = {})
|
22
|
+
return {} unless info.is_a? Hash
|
23
|
+
mapper = Hash.new {|hash,key| hash[key] = key}
|
24
|
+
mapper.merge! map_keys
|
25
|
+
unless (puid = info[mapper[:PUID]]).blank?
|
26
|
+
info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
|
27
|
+
end
|
28
|
+
unless (mime = info[mapper[:MIME]]).blank?
|
29
|
+
info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
|
30
|
+
end
|
31
|
+
unless (type_name = info[mapper[:TYPE]]).nil?
|
32
|
+
info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
|
33
|
+
info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
|
34
|
+
info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
|
35
|
+
info[mapper[:GROUP]] = self.type_group(type_name)
|
36
|
+
end
|
37
|
+
info
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.type_group(t)
|
41
|
+
typeinfo(t)[:GROUP]
|
42
|
+
end
|
43
|
+
|
44
|
+
def self.type_mimetypes(t)
|
45
|
+
typeinfo(t)[:MIME] || []
|
46
|
+
end
|
47
|
+
|
48
|
+
def self.type_puids(t)
|
49
|
+
typeinfo(t)[:PUID] || []
|
50
|
+
end
|
51
|
+
|
52
|
+
def self.type_extentions(t)
|
53
|
+
typeinfo(t)[:EXTENSIONS] || []
|
54
|
+
end
|
55
|
+
|
56
|
+
def self.group_types(group)
|
57
|
+
self.instance.types.select do |_, v|
|
58
|
+
v[:GROUP] == group.to_sym
|
59
|
+
end.keys
|
60
|
+
end
|
61
|
+
|
62
|
+
def self.puid_infos(puid)
|
63
|
+
self.instance.types.select do |_, v|
|
64
|
+
v[:PUID].include? puid rescue false
|
65
|
+
end.values
|
66
|
+
end
|
67
|
+
|
68
|
+
def self.puid_types(puid)
|
69
|
+
self.instance.types.select do |_, v|
|
70
|
+
v[:PUID].include? puid rescue false
|
71
|
+
end.keys
|
72
|
+
end
|
73
|
+
|
74
|
+
def self.puid_groups(puid)
|
75
|
+
puid_types(puid).map do |t|
|
76
|
+
type_group t
|
77
|
+
end
|
78
|
+
end
|
79
|
+
|
80
|
+
def self.mime_infos(mime)
|
81
|
+
self.instance.types.select do |_, v|
|
82
|
+
v[:MIME].include? mime rescue false
|
83
|
+
end.values
|
84
|
+
end
|
85
|
+
|
86
|
+
def self.mime_types(mime)
|
87
|
+
self.instance.types.select do |_, v|
|
88
|
+
v[:MIME].include? mime rescue false
|
89
|
+
end.keys
|
90
|
+
end
|
91
|
+
|
92
|
+
def self.mime_groups(mime)
|
93
|
+
mime_types(mime).map do |t|
|
94
|
+
type_group t
|
95
|
+
end
|
96
|
+
end
|
97
|
+
|
98
|
+
def self.ext_infos(ext)
|
99
|
+
ext = ext.gsub /^\./, ''
|
100
|
+
self.instance.types.select do |_, v|
|
101
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
102
|
+
end.values
|
103
|
+
end
|
104
|
+
|
105
|
+
def self.ext_types(ext)
|
106
|
+
ext = ext.gsub /^\./, ''
|
107
|
+
self.instance.types.select do |_, v|
|
108
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
109
|
+
end.keys
|
110
|
+
end
|
111
|
+
|
112
|
+
def self.puid_typeinfo(puid)
|
113
|
+
self.instance.types.each do |_, v|
|
114
|
+
return v if v[:PUID] and v[:PUID].include?(puid)
|
115
|
+
end
|
116
|
+
nil
|
117
|
+
end
|
118
|
+
|
119
|
+
def self.known_mime?(mime)
|
120
|
+
self.instance.types.each do |_, v|
|
121
|
+
return true if v[:MIME].include? mime
|
122
|
+
end
|
123
|
+
false
|
124
|
+
end
|
125
|
+
|
126
|
+
attr_reader :types
|
127
|
+
|
128
|
+
def load_types(file_or_hash = {}, append = true)
|
129
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
130
|
+
# noinspection RubyResolve
|
131
|
+
hash.each do |group, type_info|
|
132
|
+
type_info.each do |type_name, info|
|
133
|
+
type_key = type_name.to_sym
|
134
|
+
info.symbolize_keys!
|
135
|
+
info[:TYPE] = type_key
|
136
|
+
info[:GROUP] = group.to_sym
|
137
|
+
info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
138
|
+
info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
139
|
+
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
|
140
|
+
if @types.has_key?(type_key)
|
141
|
+
warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
|
142
|
+
info.merge!(@types[type_key]) do |_,v_new,v_old|
|
143
|
+
case v_old
|
144
|
+
when Array
|
145
|
+
append ? v_old + v_new : v_new + v_old
|
146
|
+
when Hash
|
147
|
+
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
148
|
+
else
|
149
|
+
append ? v_old : v_new
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
@types[type_key] = info
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
157
|
+
|
158
|
+
protected
|
159
|
+
|
160
|
+
def initialize
|
161
|
+
@types = Hash.new
|
162
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
163
|
+
type_database = File.join(data_dir, 'types.yml')
|
164
|
+
load_types(type_database)
|
165
|
+
end
|
166
|
+
|
167
|
+
end
|
168
|
+
|
169
|
+
end
|
170
|
+
end
|
data/lib/libis/format.rb
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
require 'libis/format/version'
|
2
|
+
|
3
|
+
module Libis
|
4
|
+
module Format
|
5
|
+
autoload :Config, 'libis/format/config'
|
6
|
+
autoload :TypeDatabase, 'libis/format/type_database'
|
7
|
+
autoload :Identifier, 'libis/format/identifier'
|
8
|
+
autoload :Fido, 'libis/format/fido'
|
9
|
+
autoload :Droid, 'libis/format/droid'
|
10
|
+
autoload :OfficeToPdf, 'libis/format/office_to_pdf'
|
11
|
+
autoload :PdfCopy, 'libis/format/pdf_copy'
|
12
|
+
autoload :PdfMerge, 'libis/format/pdf_merge'
|
13
|
+
autoload :PdfSplit, 'libis/format/pdf_split'
|
14
|
+
autoload :PdfToPdfa, 'libis/format/pdf_to_pdfa'
|
15
|
+
autoload :PdfaValidator, 'libis/format/pdfa_validator'
|
16
|
+
|
17
|
+
autoload :Converter, 'libis/format/converter'
|
18
|
+
|
19
|
+
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
20
|
+
DATA_DIR = File.join(ROOT_DIR, 'data')
|
21
|
+
|
22
|
+
end
|
23
|
+
end
|