libis-format 0.9.5-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +18 -0
- data/.travis.yml +41 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +40 -0
- data/data/ead.xsd +2728 -0
- data/data/eciRGB_v2.icc +0 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +217 -0
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +101 -0
- data/lib/libis/format/converter/chain.rb +167 -0
- data/lib/libis/format/converter/image_converter.rb +214 -0
- data/lib/libis/format/converter/office_converter.rb +50 -0
- data/lib/libis/format/converter/pdf_converter.rb +139 -0
- data/lib/libis/format/converter/repository.rb +98 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +45 -0
- data/lib/libis/format/fido.rb +102 -0
- data/lib/libis/format/identifier.rb +189 -0
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +40 -0
- data/lib/libis/format/pdf_merge.rb +41 -0
- data/lib/libis/format/pdf_split.rb +39 -0
- data/lib/libis/format/pdf_to_pdfa.rb +76 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +23 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +34 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +60 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +396 -0
|
@@ -0,0 +1,189 @@
|
|
|
1
|
+
# encoding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
|
|
5
|
+
require 'libis-tools'
|
|
6
|
+
require 'libis/tools/extend/string'
|
|
7
|
+
require 'libis/tools/extend/empty'
|
|
8
|
+
|
|
9
|
+
require 'libis/format/type_database'
|
|
10
|
+
|
|
11
|
+
require_relative 'fido'
|
|
12
|
+
require_relative 'droid'
|
|
13
|
+
|
|
14
|
+
module Libis
|
|
15
|
+
module Format
|
|
16
|
+
|
|
17
|
+
class Identifier
|
|
18
|
+
include ::Libis::Tools::Logger
|
|
19
|
+
include Singleton
|
|
20
|
+
|
|
21
|
+
RETRY_MIMETYPES = %w(application/zip) + ::Libis::Format::Fido::BAD_MIMETYPES
|
|
22
|
+
FIDO_FAILURES = %w(application/vnd.oasis.opendocument.text application/vnd.oasis.opendocument.spreadsheet)
|
|
23
|
+
|
|
24
|
+
attr_reader :xml_validations
|
|
25
|
+
|
|
26
|
+
protected
|
|
27
|
+
|
|
28
|
+
def initialize
|
|
29
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
30
|
+
@fido_formats = [(File.join(data_dir, 'lias_formats.xml'))]
|
|
31
|
+
# noinspection RubyStringKeysInHashInspection
|
|
32
|
+
@xml_validations = {'archive/ead' => File.join(data_dir, 'ead.xsd')}
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
def result_ok?(result, who_is_asking = nil)
|
|
36
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
|
37
|
+
return false if result.empty?
|
|
38
|
+
return true unless result[:TYPE].empty?
|
|
39
|
+
return false if RETRY_MIMETYPES.include? result[:mimetype]
|
|
40
|
+
return false if FIDO_FAILURES.include? result[:mimetype] and who_is_asking == :DROID
|
|
41
|
+
!(result[:mimetype].empty? and result[:puid].empty?)
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def get_puid(mimetype)
|
|
45
|
+
::Libis::Format::TypeDatabase.mime_infos(mimetype).first[:PUID].first rescue nil
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
public
|
|
49
|
+
|
|
50
|
+
def self.add_fido_format(f)
|
|
51
|
+
::Libis::Format::Fido.add_format f
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.add_xml_validation(mimetype, xsd_file)
|
|
55
|
+
instance.xml_validations[mimetype] = xsd_file
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def self.xml_validations
|
|
59
|
+
instance.xml_validations
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.get(file_path, options = nil)
|
|
63
|
+
instance.get file_path, options
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
def get(file, options = nil)
|
|
67
|
+
|
|
68
|
+
unless File.exists? file
|
|
69
|
+
error 'File %s cannot be found.', file
|
|
70
|
+
return nil
|
|
71
|
+
end
|
|
72
|
+
if File.directory? file
|
|
73
|
+
error '%s is a directory.', file
|
|
74
|
+
return nil
|
|
75
|
+
end
|
|
76
|
+
|
|
77
|
+
options ||= {}
|
|
78
|
+
|
|
79
|
+
result = { messages: [] }
|
|
80
|
+
|
|
81
|
+
# use FIDO
|
|
82
|
+
# Note: FIDO does not always do a good job, mainly due to lacking container inspection.
|
|
83
|
+
# FIDO misses should be registered in
|
|
84
|
+
result = get_fido_identification(file, result, options[:formats]) unless options[:droid]
|
|
85
|
+
|
|
86
|
+
# use DROID
|
|
87
|
+
result = get_droid_identification file, result
|
|
88
|
+
|
|
89
|
+
# use FILE
|
|
90
|
+
result = get_file_identification(file, result)
|
|
91
|
+
|
|
92
|
+
# Try file extension
|
|
93
|
+
result = get_extension_identification(file, result)
|
|
94
|
+
|
|
95
|
+
# determine XML type. Add custom types at runtime with
|
|
96
|
+
# Libis::Tools::Format::Identifier.add_xml_validation('my_type', '/path/to/my_type.xsd')
|
|
97
|
+
result = validate_against_xml_schema(file, result)
|
|
98
|
+
|
|
99
|
+
result[:mimetype] ?
|
|
100
|
+
log_msg(result, :info, "Identification of '#{file}': '#{result}'") :
|
|
101
|
+
log_msg(result, :warn, "Could not identify MIME type of '#{file}'")
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
def get_fido_identification(file, result = {}, xtra_formats = nil)
|
|
105
|
+
return result if result_ok? result
|
|
106
|
+
|
|
107
|
+
fido_result = ::Libis::Format::Fido.run(file, xtra_formats)
|
|
108
|
+
|
|
109
|
+
return result unless fido_result.is_a? Hash
|
|
110
|
+
|
|
111
|
+
result.merge! fido_result
|
|
112
|
+
result[:method] = 'fido'
|
|
113
|
+
|
|
114
|
+
log_msg(result, :debug, "Fido MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
def get_droid_identification(file, result = {})
|
|
118
|
+
return result if result_ok? result, :DROID
|
|
119
|
+
droid_output = ::Libis::Format::Droid.run file
|
|
120
|
+
result[:messages] << [:debug, "DROID: #{droid_output}"]
|
|
121
|
+
warn 'Droid found multiple matches; using first match only' if droid_output.size > 1
|
|
122
|
+
result.clear
|
|
123
|
+
droid_output = droid_output.first
|
|
124
|
+
result[:mimetype] = droid_output[:mime_type].to_s.split(/[\s,]+/).find {|x| x =~ /.*\/.*/}
|
|
125
|
+
result[:matchtype] = droid_output[:method]
|
|
126
|
+
result[:puid] = droid_output[:puid]
|
|
127
|
+
result[:format_name] = droid_output[:format_name]
|
|
128
|
+
result[:format_version] = droid_output[:format_version]
|
|
129
|
+
result[:method] = 'droid'
|
|
130
|
+
|
|
131
|
+
log_msg(result, :debug, "Droid MIME-type: #{result[:mimetype]} (PRONOM UID: #{result[:puid]})")
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def get_file_identification(file, result = nil)
|
|
135
|
+
return result if result_ok? result
|
|
136
|
+
begin
|
|
137
|
+
output = ::Libis::Tools::Command.run('file', '-b', '--mime-type', "\"#{file.escape_for_string}\"")[:err]
|
|
138
|
+
mimetype = output.strip.split
|
|
139
|
+
if mimetype
|
|
140
|
+
log_msg(result, :debug, "File result: '#{mimetype}'")
|
|
141
|
+
result[:mimetype] = mimetype
|
|
142
|
+
result[:puid] = get_puid(mimetype)
|
|
143
|
+
end
|
|
144
|
+
result[:method] = 'file'
|
|
145
|
+
rescue Exception
|
|
146
|
+
# ignored
|
|
147
|
+
end
|
|
148
|
+
result
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def get_extension_identification(file, result = nil)
|
|
152
|
+
return result if result_ok? result
|
|
153
|
+
info = ::Libis::Format::TypeDatabase.ext_infos(File.extname(file)).first
|
|
154
|
+
log_msg result, :debug, "File extension info: #{info}"
|
|
155
|
+
if info
|
|
156
|
+
result[:mimetype] = info[:MIME].first rescue nil
|
|
157
|
+
result[:puid] = info[:PUID].first rescue nil
|
|
158
|
+
end
|
|
159
|
+
result[:method] = 'extension'
|
|
160
|
+
result
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
def validate_against_xml_schema(file, result)
|
|
164
|
+
return result unless result[:mimetype] =~ /^(text|application)\/xml$/
|
|
165
|
+
doc = ::Libis::Tools::XmlDocument.open file
|
|
166
|
+
xml_validations.each do |mime, xsd_file|
|
|
167
|
+
next unless xsd_file
|
|
168
|
+
if doc.validates_against?(xsd_file)
|
|
169
|
+
log_msg result, :debug, "XML file validated against XML Schema: #{xsd_file}"
|
|
170
|
+
result[:mimetype] = mime
|
|
171
|
+
result[:puid] = nil
|
|
172
|
+
result = ::Libis::Format::TypeDatabase.enrich(result, PUID: :puid, MIME: :mimetype)
|
|
173
|
+
end
|
|
174
|
+
end
|
|
175
|
+
result
|
|
176
|
+
end
|
|
177
|
+
|
|
178
|
+
private
|
|
179
|
+
|
|
180
|
+
def log_msg(result, severity, text)
|
|
181
|
+
return {} unless result.is_a?(Hash)
|
|
182
|
+
(result[:messages] ||= []) << [severity, text]
|
|
183
|
+
result
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
end
|
|
189
|
+
end
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
|
|
3
|
+
require 'libis/tools/extend/string'
|
|
4
|
+
require 'libis/tools/logger'
|
|
5
|
+
require 'libis/tools/command'
|
|
6
|
+
|
|
7
|
+
require 'libis/format/config'
|
|
8
|
+
|
|
9
|
+
module Libis
|
|
10
|
+
module Format
|
|
11
|
+
|
|
12
|
+
class OfficeToPdf
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def self.run(source, target, options = {})
|
|
16
|
+
self.new.run source, target, options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(source, target, options = {})
|
|
20
|
+
workdir = '/...'
|
|
21
|
+
workdir = Dir.tmpdir unless Dir.exist? workdir
|
|
22
|
+
|
|
23
|
+
workdir = File.join(workdir, rand(1000000).to_s)
|
|
24
|
+
FileUtils.mkpath(workdir)
|
|
25
|
+
|
|
26
|
+
src_file = File.join(workdir, File.basename(source))
|
|
27
|
+
FileUtils.link source, src_file
|
|
28
|
+
|
|
29
|
+
tgt_file = File.join(workdir, File.basename(source, '.*') + '.pdf')
|
|
30
|
+
|
|
31
|
+
export_filter = options[:export_filter] || 'pdf'
|
|
32
|
+
|
|
33
|
+
result = Libis::Tools::Command.run(
|
|
34
|
+
Libis::Format::Config[:soffice_path], '--headless',
|
|
35
|
+
'--convert-to', export_filter,
|
|
36
|
+
'--outdir', workdir, src_file
|
|
37
|
+
)
|
|
38
|
+
|
|
39
|
+
unless result[:status] == 0
|
|
40
|
+
warn "PdfConvert errors: #{(result[:err] + result[:out]).join("\n")}"
|
|
41
|
+
return false
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
FileUtils.copy tgt_file, target, preserve: true
|
|
45
|
+
FileUtils.rmtree workdir
|
|
46
|
+
|
|
47
|
+
result[:out]
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
end
|
|
52
|
+
end
|
|
@@ -0,0 +1,40 @@
|
|
|
1
|
+
require 'os'
|
|
2
|
+
|
|
3
|
+
require 'libis/tools/extend/string'
|
|
4
|
+
require 'libis/tools/logger'
|
|
5
|
+
require 'libis/tools/command'
|
|
6
|
+
|
|
7
|
+
require 'libis/format/config'
|
|
8
|
+
|
|
9
|
+
module Libis
|
|
10
|
+
module Format
|
|
11
|
+
|
|
12
|
+
class PdfCopy
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def self.run(source, target, options = [])
|
|
16
|
+
self.new.run source, target, options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(source, target, options = [])
|
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
|
22
|
+
|
|
23
|
+
if OS.java?
|
|
24
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
Libis::Tools::Command.run(
|
|
28
|
+
Libis::Format::Config[:java_path],
|
|
29
|
+
'-cp', jar_file,
|
|
30
|
+
'CopyPdf',
|
|
31
|
+
'--file_input', source,
|
|
32
|
+
'--file_output', target,
|
|
33
|
+
*options
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
end
|
|
40
|
+
end
|
|
@@ -0,0 +1,41 @@
|
|
|
1
|
+
require 'os'
|
|
2
|
+
|
|
3
|
+
require 'libis/tools/extend/string'
|
|
4
|
+
require 'libis/tools/logger'
|
|
5
|
+
require 'libis/tools/command'
|
|
6
|
+
|
|
7
|
+
require 'libis/format/config'
|
|
8
|
+
|
|
9
|
+
module Libis
|
|
10
|
+
module Format
|
|
11
|
+
|
|
12
|
+
class PdfMerge
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def self.run(source, target, options = [])
|
|
16
|
+
self.new.run source, target, options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(source, target, options = [])
|
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
|
22
|
+
source = [source] unless source.is_a?(Array)
|
|
23
|
+
|
|
24
|
+
if OS.java?
|
|
25
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
Libis::Tools::Command.run(
|
|
29
|
+
Libis::Format::Config[:java_path],
|
|
30
|
+
'-cp', jar_file,
|
|
31
|
+
'MergePdf',
|
|
32
|
+
'--file_output', target,
|
|
33
|
+
*options,
|
|
34
|
+
*source,
|
|
35
|
+
)
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
end
|
|
41
|
+
end
|
|
@@ -0,0 +1,39 @@
|
|
|
1
|
+
require 'os'
|
|
2
|
+
|
|
3
|
+
require 'libis/tools/extend/string'
|
|
4
|
+
require 'libis/tools/logger'
|
|
5
|
+
require 'libis/tools/command'
|
|
6
|
+
|
|
7
|
+
require 'libis/format/config'
|
|
8
|
+
|
|
9
|
+
module Libis
|
|
10
|
+
module Format
|
|
11
|
+
|
|
12
|
+
class PdfSplit
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def self.run(source, target, options = [])
|
|
16
|
+
self.new.run source, target, options
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(source, target, options = [])
|
|
20
|
+
tool_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'tools'))
|
|
21
|
+
jar_file = File.join(tool_dir, 'PdfTool.jar')
|
|
22
|
+
if OS.java?
|
|
23
|
+
# TODO: import library and execute in current VM. For now do exactly as in MRI.
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
Libis::Tools::Command.run(
|
|
27
|
+
Libis::Format::Config[:java_path],
|
|
28
|
+
'-cp', jar_file,
|
|
29
|
+
'SplitPdf',
|
|
30
|
+
'--file_input', source,
|
|
31
|
+
'--file_output', target,
|
|
32
|
+
*options
|
|
33
|
+
)
|
|
34
|
+
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
|
|
38
|
+
end
|
|
39
|
+
end
|
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'tempfile'
|
|
2
|
+
require 'csv'
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
|
|
5
|
+
require 'libis/tools/extend/string'
|
|
6
|
+
require 'libis/tools/logger'
|
|
7
|
+
require 'libis/tools/command'
|
|
8
|
+
|
|
9
|
+
require 'libis/format'
|
|
10
|
+
|
|
11
|
+
module Libis
|
|
12
|
+
module Format
|
|
13
|
+
|
|
14
|
+
class PdfToPdfa
|
|
15
|
+
include ::Libis::Tools::Logger
|
|
16
|
+
|
|
17
|
+
def self.run(source, target = nil, options = {})
|
|
18
|
+
self.new.run source, target, options
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run(source, target = nil, options = nil)
|
|
22
|
+
|
|
23
|
+
target ||= File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname([File.basename(source, '.*'), '.pdf']))
|
|
24
|
+
|
|
25
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
26
|
+
|
|
27
|
+
icc_info = icc_options(options[:colorspace])
|
|
28
|
+
|
|
29
|
+
icc_file = File.join(Dir.tmpdir, "#{icc_info[:icc_name]}#{Random.new.bytes(12).unpack('H*').first}.icc")
|
|
30
|
+
FileUtils.cp(File.join(data_dir, "#{icc_info[:icc_name]}.icc"), icc_file)
|
|
31
|
+
|
|
32
|
+
def_filename = File.join(Dir.tmpdir, "PDFA_def_#{Random.new.bytes(12).unpack('H*').first}.ps")
|
|
33
|
+
File.open(def_filename, 'w') do |f|
|
|
34
|
+
f.puts File.read(File.join(Libis::Format::DATA_DIR, 'PDFA_def.ps')).
|
|
35
|
+
gsub('[** Fill in ICC profile location **]', icc_file).
|
|
36
|
+
gsub('[** Fill in ICC reference name **]', icc_info[:icc_ref])
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
result = Libis::Tools::Command.run(
|
|
40
|
+
Libis::Format::Config[:ghostscript_path],
|
|
41
|
+
'-dBATCH', '-dNOPAUSE', '-dNOOUTERSAVE',
|
|
42
|
+
'-sColorConversionStrategy=/UseDeviceIndependentColor',
|
|
43
|
+
"-sProcessColorModel=#{icc_info[:device]}",
|
|
44
|
+
'-sDEVICE=pdfwrite', '-dPDFA', '-dPDFACompatibilityPolicy=1',
|
|
45
|
+
"-sOutputICCProfile=#{icc_file}",
|
|
46
|
+
'-o', File.absolute_path(target),
|
|
47
|
+
def_filename,
|
|
48
|
+
source
|
|
49
|
+
)
|
|
50
|
+
|
|
51
|
+
FileUtils.rm [icc_file, def_filename].compact, force: true
|
|
52
|
+
|
|
53
|
+
unless PdfaValidator.run(target)
|
|
54
|
+
result[:status] = -999
|
|
55
|
+
result[:err] << 'Failed to validate generated PDF/A file.'
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
result
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
private
|
|
63
|
+
|
|
64
|
+
def icc_options(colorspace)
|
|
65
|
+
case colorspace.to_s.downcase
|
|
66
|
+
when 'cmyk'
|
|
67
|
+
{icc_name: 'ISOcoated_v2_eci', icc_ref: 'FOGRA39L', device: 'DeviceCMYK'}
|
|
68
|
+
else
|
|
69
|
+
{icc_name: 'eciRGB_v2', icc_ref: 'sRGB', device: 'DeviceRGB'}
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
end
|
|
76
|
+
end
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
require 'fileutils'
|
|
2
|
+
|
|
3
|
+
require 'libis/tools/extend/string'
|
|
4
|
+
require 'libis/tools/logger'
|
|
5
|
+
require 'libis/tools/command'
|
|
6
|
+
|
|
7
|
+
require 'libis/format/config'
|
|
8
|
+
|
|
9
|
+
module Libis
|
|
10
|
+
module Format
|
|
11
|
+
|
|
12
|
+
class PdfaValidator
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def self.run(source)
|
|
16
|
+
self.new.run source
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def run(source)
|
|
20
|
+
|
|
21
|
+
src_file = File.absolute_path(source)
|
|
22
|
+
|
|
23
|
+
if (pdfa = Libis::Format::Config[:pdfa_path])
|
|
24
|
+
# Keep it clean: tool generates fontconfig/ cache dir in current working dir
|
|
25
|
+
previous_wd = Dir.getwd
|
|
26
|
+
Dir.chdir(Dir.tmpdir)
|
|
27
|
+
|
|
28
|
+
result = Libis::Tools::Command.run(
|
|
29
|
+
pdfa,
|
|
30
|
+
'--noxml',
|
|
31
|
+
'--level', 'B',
|
|
32
|
+
'--verb', '0',
|
|
33
|
+
src_file
|
|
34
|
+
)
|
|
35
|
+
|
|
36
|
+
Dir.chdir(previous_wd)
|
|
37
|
+
|
|
38
|
+
unless result[:out].any? { |line| line =~ /^VLD-\[PASS\]/ }
|
|
39
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
|
40
|
+
result[:out].join("\n")
|
|
41
|
+
return false
|
|
42
|
+
end
|
|
43
|
+
else
|
|
44
|
+
jar = File.join(ROOT_DIR, 'tools', 'pdfbox', 'preflight-app-1.8.10.jar')
|
|
45
|
+
result = Libis::Tools::Command.run(
|
|
46
|
+
Libis::Format::Config[:java_path],
|
|
47
|
+
'-jar', jar,
|
|
48
|
+
src_file
|
|
49
|
+
)
|
|
50
|
+
unless result[:status] == 0
|
|
51
|
+
warn "Validator failed to validate the PDF file '%s' against PDF/A-1B constraints:\n%s", source,
|
|
52
|
+
result[:out].join("\n")
|
|
53
|
+
return false
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
true
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
end
|
|
61
|
+
end
|
|
@@ -0,0 +1,170 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'singleton'
|
|
4
|
+
require 'yaml'
|
|
5
|
+
|
|
6
|
+
require 'backports/rails/hash'
|
|
7
|
+
require 'libis/tools/logger'
|
|
8
|
+
require 'libis/tools/extend/string'
|
|
9
|
+
|
|
10
|
+
module Libis
|
|
11
|
+
module Format
|
|
12
|
+
|
|
13
|
+
class TypeDatabase
|
|
14
|
+
include Singleton
|
|
15
|
+
include ::Libis::Tools::Logger
|
|
16
|
+
|
|
17
|
+
def self.typeinfo(t)
|
|
18
|
+
self.instance.types[t.to_sym] || {}
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def self.enrich(info, map_keys = {})
|
|
22
|
+
return {} unless info.is_a? Hash
|
|
23
|
+
mapper = Hash.new {|hash,key| hash[key] = key}
|
|
24
|
+
mapper.merge! map_keys
|
|
25
|
+
unless (puid = info[mapper[:PUID]]).blank?
|
|
26
|
+
info[mapper[:TYPE]] ||= self.puid_infos(puid).first[:TYPE] rescue nil
|
|
27
|
+
end
|
|
28
|
+
unless (mime = info[mapper[:MIME]]).blank?
|
|
29
|
+
info[mapper[:TYPE]] ||= self.mime_infos(mime).first[:TYPE] rescue nil
|
|
30
|
+
end
|
|
31
|
+
unless (type_name = info[mapper[:TYPE]]).nil?
|
|
32
|
+
info[mapper[:MIME]] = self.type_mimetypes(type_name).first if info[mapper[:MIME]].blank?
|
|
33
|
+
info[mapper[:PUID]] = self.type_puids(type_name).first if info[mapper[:PUID]].blank?
|
|
34
|
+
info[mapper[:EXTENSIONS]] = self.type_extentions(type_name)
|
|
35
|
+
info[mapper[:GROUP]] = self.type_group(type_name)
|
|
36
|
+
end
|
|
37
|
+
info
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def self.type_group(t)
|
|
41
|
+
typeinfo(t)[:GROUP]
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
def self.type_mimetypes(t)
|
|
45
|
+
typeinfo(t)[:MIME] || []
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
def self.type_puids(t)
|
|
49
|
+
typeinfo(t)[:PUID] || []
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def self.type_extentions(t)
|
|
53
|
+
typeinfo(t)[:EXTENSIONS] || []
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
def self.group_types(group)
|
|
57
|
+
self.instance.types.select do |_, v|
|
|
58
|
+
v[:GROUP] == group.to_sym
|
|
59
|
+
end.keys
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
def self.puid_infos(puid)
|
|
63
|
+
self.instance.types.select do |_, v|
|
|
64
|
+
v[:PUID].include? puid rescue false
|
|
65
|
+
end.values
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
def self.puid_types(puid)
|
|
69
|
+
self.instance.types.select do |_, v|
|
|
70
|
+
v[:PUID].include? puid rescue false
|
|
71
|
+
end.keys
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
def self.puid_groups(puid)
|
|
75
|
+
puid_types(puid).map do |t|
|
|
76
|
+
type_group t
|
|
77
|
+
end
|
|
78
|
+
end
|
|
79
|
+
|
|
80
|
+
def self.mime_infos(mime)
|
|
81
|
+
self.instance.types.select do |_, v|
|
|
82
|
+
v[:MIME].include? mime rescue false
|
|
83
|
+
end.values
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
def self.mime_types(mime)
|
|
87
|
+
self.instance.types.select do |_, v|
|
|
88
|
+
v[:MIME].include? mime rescue false
|
|
89
|
+
end.keys
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
def self.mime_groups(mime)
|
|
93
|
+
mime_types(mime).map do |t|
|
|
94
|
+
type_group t
|
|
95
|
+
end
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
def self.ext_infos(ext)
|
|
99
|
+
ext = ext.gsub /^\./, ''
|
|
100
|
+
self.instance.types.select do |_, v|
|
|
101
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
|
102
|
+
end.values
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
def self.ext_types(ext)
|
|
106
|
+
ext = ext.gsub /^\./, ''
|
|
107
|
+
self.instance.types.select do |_, v|
|
|
108
|
+
v[:EXTENSIONS].include?(ext) rescue false
|
|
109
|
+
end.keys
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
def self.puid_typeinfo(puid)
|
|
113
|
+
self.instance.types.each do |_, v|
|
|
114
|
+
return v if v[:PUID] and v[:PUID].include?(puid)
|
|
115
|
+
end
|
|
116
|
+
nil
|
|
117
|
+
end
|
|
118
|
+
|
|
119
|
+
def self.known_mime?(mime)
|
|
120
|
+
self.instance.types.each do |_, v|
|
|
121
|
+
return true if v[:MIME].include? mime
|
|
122
|
+
end
|
|
123
|
+
false
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
attr_reader :types
|
|
127
|
+
|
|
128
|
+
def load_types(file_or_hash = {}, append = true)
|
|
129
|
+
hash = file_or_hash.is_a?(Hash) ? file_or_hash : YAML::load_file(file_or_hash)
|
|
130
|
+
# noinspection RubyResolve
|
|
131
|
+
hash.each do |group, type_info|
|
|
132
|
+
type_info.each do |type_name, info|
|
|
133
|
+
type_key = type_name.to_sym
|
|
134
|
+
info.symbolize_keys!
|
|
135
|
+
info[:TYPE] = type_key
|
|
136
|
+
info[:GROUP] = group.to_sym
|
|
137
|
+
info[:MIME] = info[:MIME].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
|
138
|
+
info[:EXTENSIONS] = info[:EXTENSIONS].strip.split(/[\s,]+/).map { |v| v.strip } rescue []
|
|
139
|
+
info[:PUID] = info[:PUID].strip.split(/[\s,]+/).map { |v| v.strip } if info[:PUID]
|
|
140
|
+
if @types.has_key?(type_key)
|
|
141
|
+
warn 'Type %s already defined; merging with info from %s.', type_name.to_s, file_or_hash
|
|
142
|
+
info.merge!(@types[type_key]) do |_,v_new,v_old|
|
|
143
|
+
case v_old
|
|
144
|
+
when Array
|
|
145
|
+
append ? v_old + v_new : v_new + v_old
|
|
146
|
+
when Hash
|
|
147
|
+
append ? v_new.merge(v_old) : v_old.merge(v_new)
|
|
148
|
+
else
|
|
149
|
+
append ? v_old : v_new
|
|
150
|
+
end
|
|
151
|
+
end
|
|
152
|
+
end
|
|
153
|
+
@types[type_key] = info
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
protected
|
|
159
|
+
|
|
160
|
+
def initialize
|
|
161
|
+
@types = Hash.new
|
|
162
|
+
data_dir = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..', '..', 'data'))
|
|
163
|
+
type_database = File.join(data_dir, 'types.yml')
|
|
164
|
+
load_types(type_database)
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
end
|
|
170
|
+
end
|
data/lib/libis/format.rb
ADDED
|
@@ -0,0 +1,23 @@
|
|
|
1
|
+
require 'libis/format/version'
|
|
2
|
+
|
|
3
|
+
module Libis
|
|
4
|
+
module Format
|
|
5
|
+
autoload :Config, 'libis/format/config'
|
|
6
|
+
autoload :TypeDatabase, 'libis/format/type_database'
|
|
7
|
+
autoload :Identifier, 'libis/format/identifier'
|
|
8
|
+
autoload :Fido, 'libis/format/fido'
|
|
9
|
+
autoload :Droid, 'libis/format/droid'
|
|
10
|
+
autoload :OfficeToPdf, 'libis/format/office_to_pdf'
|
|
11
|
+
autoload :PdfCopy, 'libis/format/pdf_copy'
|
|
12
|
+
autoload :PdfMerge, 'libis/format/pdf_merge'
|
|
13
|
+
autoload :PdfSplit, 'libis/format/pdf_split'
|
|
14
|
+
autoload :PdfToPdfa, 'libis/format/pdf_to_pdfa'
|
|
15
|
+
autoload :PdfaValidator, 'libis/format/pdfa_validator'
|
|
16
|
+
|
|
17
|
+
autoload :Converter, 'libis/format/converter'
|
|
18
|
+
|
|
19
|
+
ROOT_DIR = File.absolute_path(File.join(File.dirname(__FILE__), '..', '..'))
|
|
20
|
+
DATA_DIR = File.join(ROOT_DIR, 'data')
|
|
21
|
+
|
|
22
|
+
end
|
|
23
|
+
end
|