libis-format 0.9.5-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +18 -0
- data/.travis.yml +41 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +39 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/bin/pdf_copy +13 -0
- data/data/ISOcoated_v2_eci.icc +0 -0
- data/data/PDFA_def.ps +40 -0
- data/data/ead.xsd +2728 -0
- data/data/eciRGB_v2.icc +0 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +217 -0
- data/lib/libis/format/config.rb +35 -0
- data/lib/libis/format/converter/base.rb +101 -0
- data/lib/libis/format/converter/chain.rb +167 -0
- data/lib/libis/format/converter/image_converter.rb +214 -0
- data/lib/libis/format/converter/office_converter.rb +50 -0
- data/lib/libis/format/converter/pdf_converter.rb +139 -0
- data/lib/libis/format/converter/repository.rb +98 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +45 -0
- data/lib/libis/format/fido.rb +102 -0
- data/lib/libis/format/identifier.rb +189 -0
- data/lib/libis/format/office_to_pdf.rb +52 -0
- data/lib/libis/format/pdf_copy.rb +40 -0
- data/lib/libis/format/pdf_merge.rb +41 -0
- data/lib/libis/format/pdf_split.rb +39 -0
- data/lib/libis/format/pdf_to_pdfa.rb +76 -0
- data/lib/libis/format/pdfa_validator.rb +61 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +23 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +34 -0
- data/spec/converter_spec.rb +212 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test-options.jpg +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.jpg +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.pdf.tif +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/data/test_pdfa.pdf +0 -0
- data/spec/identifier_spec.rb +60 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/PdfTool.jar +0 -0
- data/tools/bcpkix-jdk15on-1.49.jar +0 -0
- data/tools/bcprov-jdk15on-1.49.jar +0 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
- data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
- metadata +396 -0
data/data/eciRGB_v2.icc
ADDED
Binary file
|
@@ -0,0 +1,106 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
|
3
|
+
<format>
|
4
|
+
<puid>lias-fmt/101</puid>
|
5
|
+
<mime>text/xml</mime>
|
6
|
+
<name>Extensible Markup Language</name>
|
7
|
+
<version>1.0</version>
|
8
|
+
<alias>XML (1.0)</alias>
|
9
|
+
<pronom_id>638</pronom_id>
|
10
|
+
<extension>xml</extension>
|
11
|
+
<has_priority_over>fmt/101</has_priority_over>
|
12
|
+
<signature>
|
13
|
+
<name>XML 1.0</name>
|
14
|
+
<pattern>
|
15
|
+
<position>BOF</position>
|
16
|
+
<regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
|
17
|
+
</pattern>
|
18
|
+
</signature>
|
19
|
+
</format>
|
20
|
+
<format>
|
21
|
+
<puid>lias-fmt/001</puid>
|
22
|
+
<name>Textura TS print file</name>
|
23
|
+
<mime>text/plain</mime>
|
24
|
+
<signature>
|
25
|
+
<name>Textura TS print file</name>
|
26
|
+
<pattern>
|
27
|
+
<position>BOF</position>
|
28
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
|
29
|
+
</pattern>
|
30
|
+
<pattern>
|
31
|
+
<position>VAR</position>
|
32
|
+
<regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
|
33
|
+
</pattern>
|
34
|
+
<pattern>
|
35
|
+
<position>VAR</position>
|
36
|
+
<regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
|
37
|
+
</pattern>
|
38
|
+
<pattern>
|
39
|
+
<position>VAR</position>
|
40
|
+
<regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
|
41
|
+
</pattern>
|
42
|
+
<pattern>
|
43
|
+
<position>VAR</position>
|
44
|
+
<regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
|
45
|
+
</pattern>
|
46
|
+
<pattern>
|
47
|
+
<position>VAR</position>
|
48
|
+
<regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
|
49
|
+
</pattern>
|
50
|
+
<pattern>
|
51
|
+
<position>VAR</position>
|
52
|
+
<regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
|
53
|
+
</pattern>
|
54
|
+
<!--
|
55
|
+
-->
|
56
|
+
<pattern>
|
57
|
+
<position>VAR</position>
|
58
|
+
<regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
|
59
|
+
</pattern>
|
60
|
+
<pattern>
|
61
|
+
<position>EOF</position>
|
62
|
+
<regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
|
63
|
+
</pattern>
|
64
|
+
</signature>
|
65
|
+
</format>
|
66
|
+
<format>
|
67
|
+
<puid>lias-fmt/002</puid>
|
68
|
+
<name>Textura VP/S print file</name>
|
69
|
+
<mime>text/plain</mime>
|
70
|
+
<has_priority_over>lias-fmt/001</has_priority_over>
|
71
|
+
<signature>
|
72
|
+
<name>Textura VP/S print file</name>
|
73
|
+
<pattern>
|
74
|
+
<position>BOF</position>
|
75
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
|
76
|
+
</pattern>
|
77
|
+
<pattern>
|
78
|
+
<position>EOF</position>
|
79
|
+
<regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
|
80
|
+
</pattern>
|
81
|
+
</signature>
|
82
|
+
</format>
|
83
|
+
<format>
|
84
|
+
<puid>lias-fmt/189.word</puid>
|
85
|
+
<name>Microsoft Office Open XML - Word</name>
|
86
|
+
<extension>docx</extension>
|
87
|
+
<has_priority_over>x-fmt/263</has_priority_over>
|
88
|
+
<has_priority_over>fmt/189</has_priority_over>
|
89
|
+
<has_priority_over>fido-fmt/189.word</has_priority_over>
|
90
|
+
<signature>
|
91
|
+
<name>Microsoft Office Open XML - Word</name>
|
92
|
+
<pattern>
|
93
|
+
<position>BOF</position>
|
94
|
+
<regex>(?s)\APK\x03\x04</regex>
|
95
|
+
</pattern>
|
96
|
+
<pattern>
|
97
|
+
<position>BOF</position>
|
98
|
+
<regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
|
99
|
+
</pattern>
|
100
|
+
<pattern>
|
101
|
+
<position>EOF</position>
|
102
|
+
<regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
|
103
|
+
</pattern>
|
104
|
+
</signature>
|
105
|
+
</format>
|
106
|
+
</formats>
|
data/data/types.yml
ADDED
@@ -0,0 +1,217 @@
|
|
1
|
+
---
|
2
|
+
# This lists all the types the converters know about along with the mime types and file extensions.
|
3
|
+
# The first file extension in the list is the default one that will be used when a file of that type is created.
|
4
|
+
# The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
|
5
|
+
# class should take care of that.
|
6
|
+
# Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
|
7
|
+
# reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
|
8
|
+
# to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
|
9
|
+
|
10
|
+
IMAGE:
|
11
|
+
TIFF:
|
12
|
+
NAME: Tagged Image File Format (TIFF)
|
13
|
+
MIME: image/tiff
|
14
|
+
EXTENSIONS: tif,tiff
|
15
|
+
|
16
|
+
JP2:
|
17
|
+
NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
|
18
|
+
MIME: image/jp2
|
19
|
+
EXTENSIONS: jp2
|
20
|
+
|
21
|
+
JPG:
|
22
|
+
NAME: Joint Photographic Experts Group (JPEG)
|
23
|
+
MIME: image/jpeg
|
24
|
+
EXTENSIONS: jpg,jpe,jpeg
|
25
|
+
|
26
|
+
PNG:
|
27
|
+
NAME: Portable Network Graphics (PNG)
|
28
|
+
MIME: image/png
|
29
|
+
EXTENSIONS: png
|
30
|
+
|
31
|
+
BMP:
|
32
|
+
NAME: Device Independent Bitmap (DIP/BMP)
|
33
|
+
MIME: image/bmp,image/x-ms-bmp
|
34
|
+
EXTENSIONS: bmp
|
35
|
+
|
36
|
+
GIF:
|
37
|
+
NAME: Graphics Interchange Format (GIF)
|
38
|
+
MIME: image/gif
|
39
|
+
EXTENSIONS: gif
|
40
|
+
|
41
|
+
PBM:
|
42
|
+
NAME: Portable Bitmap Format (PBM)
|
43
|
+
PUID: fmt/409
|
44
|
+
MIME: image/x‑portable‑bitmap
|
45
|
+
EXTENSIONS: pbm
|
46
|
+
|
47
|
+
PGM:
|
48
|
+
NAME: Portable GrayMap Format (PGM)
|
49
|
+
PUID: fmt/406
|
50
|
+
MIME: image/x‑portable‑graymap
|
51
|
+
EXTENSIONS: pgm
|
52
|
+
|
53
|
+
PPM:
|
54
|
+
NAME: Portable Pixel Map (PPM)
|
55
|
+
PUID: fmt/408
|
56
|
+
MIME: image/x‑portable‑pixmap
|
57
|
+
EXTENSIONS: ppm
|
58
|
+
|
59
|
+
AUDIO:
|
60
|
+
|
61
|
+
WAV:
|
62
|
+
NAME: Waveform Audio File Format (WAVE)
|
63
|
+
MIME: audio/x-wav
|
64
|
+
EXTENSIONS: wav
|
65
|
+
|
66
|
+
MP3:
|
67
|
+
NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
|
68
|
+
MIME: audio/mpeg
|
69
|
+
EXTENSIONS: mp3
|
70
|
+
|
71
|
+
FLAC:
|
72
|
+
NAME: Free Lossless Audio Codec (FLAC)
|
73
|
+
MIME: audio/flac
|
74
|
+
EXTENSIONS: flac
|
75
|
+
|
76
|
+
VIDEO:
|
77
|
+
|
78
|
+
MPEG:
|
79
|
+
NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
|
80
|
+
MIME: video/mpeg
|
81
|
+
EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
|
82
|
+
|
83
|
+
MPEG4:
|
84
|
+
NAME: Moving Picture Experts Group (MPEG-4)
|
85
|
+
MIME: video/mp4
|
86
|
+
EXTENSIONS: mp4,mpeg4
|
87
|
+
|
88
|
+
MJP2:
|
89
|
+
NAME: Motion JPEG 2000 (MJP2)
|
90
|
+
MIME: video/mj2
|
91
|
+
EXTENSIONS: mj2, mjp2
|
92
|
+
|
93
|
+
QTFF:
|
94
|
+
NAME: QuickTime File Format (QTFF)
|
95
|
+
MIME: video/quicktime
|
96
|
+
EXTENSIONS: mov, qt
|
97
|
+
|
98
|
+
AVI:
|
99
|
+
NAME: Audio Video Interleave (AVI)
|
100
|
+
MIME: video/x-msvideo
|
101
|
+
EXTENSIONS: avi
|
102
|
+
|
103
|
+
OGGV:
|
104
|
+
NAME: OGG Video (OGGV)
|
105
|
+
MIME: video/ogg
|
106
|
+
EXTENSIONS: ogv
|
107
|
+
|
108
|
+
WMV:
|
109
|
+
NAME: Windows Media Video (WMV)
|
110
|
+
MIME: video/x-ms-wmv
|
111
|
+
EXTENSIONS: wmv
|
112
|
+
|
113
|
+
DV:
|
114
|
+
NAME: Digital Video (DV)
|
115
|
+
MIME: video/dv
|
116
|
+
EXTENSIONS: dv
|
117
|
+
|
118
|
+
FLASH:
|
119
|
+
NAME: Flash video (FLV)
|
120
|
+
MIME: video/x-flv
|
121
|
+
EXTENSIONS: flv
|
122
|
+
|
123
|
+
TEXT:
|
124
|
+
|
125
|
+
TXT:
|
126
|
+
NAME: Unformatted text
|
127
|
+
MIME: text/plain
|
128
|
+
EXTENSIONS: txt
|
129
|
+
|
130
|
+
RTF:
|
131
|
+
NAME: Rich Text Format (RTF)
|
132
|
+
PUID: fmt/45
|
133
|
+
MIME: text/rtf application/rtf
|
134
|
+
EXTENSIONS: rtf
|
135
|
+
|
136
|
+
HTML:
|
137
|
+
NAME: HyperText Markup Language (HTML)
|
138
|
+
MIME: text/html
|
139
|
+
EXTENSIONS: html, htm
|
140
|
+
|
141
|
+
MSDOC:
|
142
|
+
NAME: Microsoft Word Document (DOC)
|
143
|
+
PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
|
144
|
+
MIME: application/vnd.ms-word application/msword
|
145
|
+
EXTENSIONS: doc
|
146
|
+
|
147
|
+
MSDOCX:
|
148
|
+
NAME: Microsoft Word OpenXML Document (DOCX)
|
149
|
+
PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
|
150
|
+
MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
151
|
+
EXTENSIONS: docx
|
152
|
+
|
153
|
+
PDF:
|
154
|
+
NAME: Adobe Portable Document Format (PDF)
|
155
|
+
PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
|
156
|
+
MIME: application/pdf
|
157
|
+
EXTENSIONS: pdf
|
158
|
+
|
159
|
+
PDFA:
|
160
|
+
NAME: Adobe Portable Document Format for Archives (PDFA)
|
161
|
+
PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
|
162
|
+
MIME: application/pdf
|
163
|
+
EXTENSIONS: pdf
|
164
|
+
|
165
|
+
WORDPERFECT:
|
166
|
+
NAME: WordPerfect Document (WPD)
|
167
|
+
PUID: x-fmt/44 x-fmt/394
|
168
|
+
MIME: application/vnd.wordperfect
|
169
|
+
EXTENSIONS: wpd
|
170
|
+
|
171
|
+
XML:
|
172
|
+
NAME: Extensible Markup Language (XML)
|
173
|
+
MIME: text/xml
|
174
|
+
PUID: fmt/101
|
175
|
+
EXTENSIONS: xml
|
176
|
+
|
177
|
+
SHAREPOINT_MAP:
|
178
|
+
NAME: Sharepoint mapping file
|
179
|
+
# This is again an invented mime type. Its actually an XML ...
|
180
|
+
MIME: text/xml/sharepoint_map
|
181
|
+
PUID: fmt/101
|
182
|
+
EXTENSIONS: xml
|
183
|
+
|
184
|
+
TABULAR:
|
185
|
+
|
186
|
+
MSXLS:
|
187
|
+
NAME: Microsoft Excel Spreadsheet (XLS)
|
188
|
+
MIME: application/vnd.ms-excel,application/msexcel
|
189
|
+
EXTENSIONS: xls
|
190
|
+
|
191
|
+
MSXLSX:
|
192
|
+
NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
|
193
|
+
PUID: fido-fmt/189.xl
|
194
|
+
MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
195
|
+
EXTENSIONS: xslx
|
196
|
+
|
197
|
+
PRESENTATION:
|
198
|
+
|
199
|
+
MSPPT:
|
200
|
+
NAME: Microsoft Powerpoint Presentation (PPT)
|
201
|
+
MIME: application/vnd.ms-powerpoint,application/mspowerpoint
|
202
|
+
EXTENSIONS: ppt
|
203
|
+
|
204
|
+
MSPPTX:
|
205
|
+
NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
|
206
|
+
PUID: fido-fmt/189.ppt
|
207
|
+
MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
|
208
|
+
EXTENSIONS: pptx
|
209
|
+
|
210
|
+
ARCHIVE:
|
211
|
+
|
212
|
+
EAD:
|
213
|
+
NAME: Encoded Archival Description (EAD)
|
214
|
+
# This is again an invented mime type. Its actually an XML ...
|
215
|
+
MIME: archive/ead
|
216
|
+
PUID: fmt/101
|
217
|
+
EXTENSIONS: ead,xml
|
@@ -0,0 +1,35 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'os'
|
3
|
+
|
4
|
+
module Libis
|
5
|
+
module Format
|
6
|
+
|
7
|
+
# noinspection RubyConstantNamingConvention
|
8
|
+
Config = ::Libis::Tools::Config
|
9
|
+
|
10
|
+
Config[:converter_chain_max_level] = 8
|
11
|
+
|
12
|
+
Config[:java_path] = 'java'
|
13
|
+
Config[:soffice_path] = 'soffice'
|
14
|
+
Config[:ghostscript_path] = 'gs'
|
15
|
+
# Config[:pdfa_path] =
|
16
|
+
# File.absolute_path(
|
17
|
+
# File.join(
|
18
|
+
# File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
|
19
|
+
# )
|
20
|
+
# )
|
21
|
+
Config[:droid_path] =
|
22
|
+
File.absolute_path(
|
23
|
+
File.join(
|
24
|
+
File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
|
25
|
+
)
|
26
|
+
)
|
27
|
+
Config[:fido_path] =
|
28
|
+
File.absolute_path(
|
29
|
+
File.join(
|
30
|
+
File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
|
31
|
+
)
|
32
|
+
)
|
33
|
+
|
34
|
+
end
|
35
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
### require 'tools/string'
|
4
|
+
require 'tmpdir'
|
5
|
+
require 'libis/tools/logger'
|
6
|
+
require 'libis/format/type_database'
|
7
|
+
|
8
|
+
require_relative 'repository'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
module Converter
|
13
|
+
|
14
|
+
class Base
|
15
|
+
include Libis::Tools::Logger
|
16
|
+
|
17
|
+
attr_reader :options, :flags
|
18
|
+
|
19
|
+
def initialize
|
20
|
+
@options = {}
|
21
|
+
@flags = {}
|
22
|
+
end
|
23
|
+
|
24
|
+
def convert(source, target, format, opts = {})
|
25
|
+
unless File.exist? source
|
26
|
+
error "Cannot find file '#{source}'."
|
27
|
+
return nil
|
28
|
+
end
|
29
|
+
@options.merge!(opts[:options]) if opts[:options]
|
30
|
+
@flags.merge!(opts[:flags]) if opts[:flags]
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.input_types(_ = nil)
|
34
|
+
raise RuntimeError, 'Method #input_types needs to be overridden in converter'
|
35
|
+
end
|
36
|
+
|
37
|
+
def self.output_types(_ = nil)
|
38
|
+
raise RuntimeError, 'Method #output_types needs to be overridden in converter'
|
39
|
+
end
|
40
|
+
|
41
|
+
|
42
|
+
def using_temp(target)
|
43
|
+
tempfile = File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname(['convert', File.extname(target)], File.basename(target, '.*').gsub(/\s/, '_')))
|
44
|
+
result = yield tempfile
|
45
|
+
return nil unless result
|
46
|
+
FileUtils.move result, target
|
47
|
+
target
|
48
|
+
end
|
49
|
+
|
50
|
+
def Base.inherited( klass )
|
51
|
+
|
52
|
+
Repository.register klass
|
53
|
+
|
54
|
+
class << self
|
55
|
+
|
56
|
+
def conversions
|
57
|
+
input_types.inject({}) do |hash, input_type|
|
58
|
+
hash[input_type] = output_types
|
59
|
+
hash
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def input_type?(type_id)
|
64
|
+
input_types.include? type_id
|
65
|
+
end
|
66
|
+
|
67
|
+
def output_type?(type_id)
|
68
|
+
output_types.include? type_id
|
69
|
+
end
|
70
|
+
|
71
|
+
def input_mimetype?(mimetype)
|
72
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
73
|
+
input_type? type_id
|
74
|
+
end
|
75
|
+
|
76
|
+
def output_mimetype?(mimetype)
|
77
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
78
|
+
output_type? type_id
|
79
|
+
end
|
80
|
+
|
81
|
+
def conversion?(input_type, output_type)
|
82
|
+
conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
|
83
|
+
end
|
84
|
+
|
85
|
+
def output_for(input_type)
|
86
|
+
conversions[input_type]
|
87
|
+
end
|
88
|
+
|
89
|
+
def extension?(extension)
|
90
|
+
!TypeDatabase.ext_types(extension).first.nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
end
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
require 'deep_dive'
|
5
|
+
|
6
|
+
require 'libis/tools/logger'
|
7
|
+
require 'libis/tools/extend/hash'
|
8
|
+
require 'libis/format/type_database'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
module Converter
|
13
|
+
|
14
|
+
class Chain
|
15
|
+
include ::Libis::Tools::Logger
|
16
|
+
include DeepDive
|
17
|
+
|
18
|
+
def initialize(source_format, target_format, operations = {})
|
19
|
+
@source_format = source_format.to_sym
|
20
|
+
@target_format = target_format.to_sym
|
21
|
+
@operations = operations || {}
|
22
|
+
@converter_chain = []
|
23
|
+
end
|
24
|
+
|
25
|
+
# @param [Libis::Format::Converter::Base.class] converter
|
26
|
+
# @return [Array[Hash]]
|
27
|
+
def append(converter)
|
28
|
+
return [] unless converter
|
29
|
+
valid_chain_nodes(converter).map do |node|
|
30
|
+
self.ddup.add_chain_node(node)
|
31
|
+
end.compact
|
32
|
+
end
|
33
|
+
|
34
|
+
def closed?
|
35
|
+
!@converter_chain.empty? &&
|
36
|
+
@converter_chain.first[:input].to_sym == @source_format &&
|
37
|
+
@converter_chain.last[:output].to_sym == @target_format
|
38
|
+
end
|
39
|
+
|
40
|
+
def valid?
|
41
|
+
closed? && apply_operations
|
42
|
+
end
|
43
|
+
|
44
|
+
def to_array
|
45
|
+
@converter_chain
|
46
|
+
end
|
47
|
+
|
48
|
+
def size
|
49
|
+
@converter_chain.size
|
50
|
+
end
|
51
|
+
|
52
|
+
alias_method :length, :size
|
53
|
+
|
54
|
+
def to_s
|
55
|
+
"#{@source_format}->-#{@converter_chain.map do |node|
|
56
|
+
"#{node[:converter].name.gsub(/^.*::/, '')}#{node[:operations].empty? ? '' :
|
57
|
+
"(#{node[:operations].each do |operation|
|
58
|
+
"#{operation[:method]}:#{operation[:argument]}"
|
59
|
+
end.join(',')})"}->-#{node[:output]}"
|
60
|
+
end.join('->-')}"
|
61
|
+
end
|
62
|
+
|
63
|
+
def convert(src_file, target_file)
|
64
|
+
|
65
|
+
unless valid?
|
66
|
+
error 'Converter chain is not valid'
|
67
|
+
return nil
|
68
|
+
end
|
69
|
+
|
70
|
+
temp_files = []
|
71
|
+
|
72
|
+
# noinspection RubyParenthesesAroundConditionInspection
|
73
|
+
result = @converter_chain.each_with_index do |node, i|
|
74
|
+
|
75
|
+
target_type = node[:output]
|
76
|
+
converter_class = node[:converter]
|
77
|
+
converter = converter_class.new
|
78
|
+
|
79
|
+
node[:operations].each do |operation|
|
80
|
+
converter.send operation[:method], operation[:argument]
|
81
|
+
end if node[:operations]
|
82
|
+
|
83
|
+
target = target_file
|
84
|
+
|
85
|
+
if i < size - 1
|
86
|
+
target += ".temp.#{TypeDatabase.type_extentions(target_type).first}"
|
87
|
+
target += ".#{TypeDatabase.type_extentions(target_type).first}" while File.exist? target
|
88
|
+
temp_files << target
|
89
|
+
end
|
90
|
+
|
91
|
+
FileUtils.mkdir_p File.dirname(target)
|
92
|
+
|
93
|
+
src_file = converter.convert(src_file, target, target_type)
|
94
|
+
|
95
|
+
break :failed unless src_file
|
96
|
+
|
97
|
+
end
|
98
|
+
|
99
|
+
temp_files.each do |f|
|
100
|
+
FileUtils.rm(f, force: true)
|
101
|
+
end
|
102
|
+
|
103
|
+
result == :failed ? nil : target_file
|
104
|
+
|
105
|
+
end
|
106
|
+
|
107
|
+
def valid_chain_nodes(converter)
|
108
|
+
source_format = @converter_chain.last[:output] rescue @source_format
|
109
|
+
nodes = []
|
110
|
+
if converter.input_types.include? source_format
|
111
|
+
converter.output_types(source_format).each do |format|
|
112
|
+
node = {converter: converter, input: source_format, output: format}
|
113
|
+
next if node_exists?(node)
|
114
|
+
nodes << node
|
115
|
+
end
|
116
|
+
end
|
117
|
+
nodes
|
118
|
+
end
|
119
|
+
|
120
|
+
def add_chain_node(node = {})
|
121
|
+
last_converter = @converter_chain.last
|
122
|
+
source_format = last_converter ? last_converter[:output] : @source_format
|
123
|
+
node[:input] ||= source_format
|
124
|
+
return nil unless node[:input] == source_format
|
125
|
+
return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
|
126
|
+
return nil unless node[:converter].input_types.include? source_format
|
127
|
+
return nil if node_exists?(node)
|
128
|
+
@converter_chain << node
|
129
|
+
# debug "Chain: #{self}"
|
130
|
+
self
|
131
|
+
end
|
132
|
+
|
133
|
+
def apply_operations
|
134
|
+
temp_chain = @converter_chain.reverse.ddup
|
135
|
+
applied = true
|
136
|
+
operations = @operations && @operations.ddup || {}
|
137
|
+
while (operation = operations.shift)
|
138
|
+
method = operation.first.to_s.to_sym
|
139
|
+
applied &&= :found == temp_chain.each do |node|
|
140
|
+
next unless node[:converter].instance_methods.include?(method)
|
141
|
+
node[:operations] ||= []
|
142
|
+
node[:operations] << {method: method, argument: operation.last}
|
143
|
+
break :found
|
144
|
+
end
|
145
|
+
end
|
146
|
+
if applied && operations.empty?
|
147
|
+
@converter_chain = temp_chain.reverse
|
148
|
+
@operations.clear
|
149
|
+
return true
|
150
|
+
end
|
151
|
+
false
|
152
|
+
end
|
153
|
+
|
154
|
+
|
155
|
+
private
|
156
|
+
|
157
|
+
def node_exists?(node)
|
158
|
+
@converter_chain.detect do |n|
|
159
|
+
n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
|
160
|
+
end
|
161
|
+
end
|
162
|
+
|
163
|
+
end
|
164
|
+
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|