libis-format 0.9.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +15 -0
- data/.travis.yml +36 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/data/ISOcoated.icc +0 -0
- data/data/PDFA_def.ps +32 -0
- data/data/ead.xsd +2728 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +213 -0
- data/lib/libis/format/converter/base.rb +103 -0
- data/lib/libis/format/converter/chain.rb +80 -0
- data/lib/libis/format/converter/repository.rb +110 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +38 -0
- data/lib/libis/format/fido.rb +109 -0
- data/lib/libis/format/identifier.rb +185 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +12 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +30 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/identifier_spec.rb +59 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/argparselocal.pyc +0 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- metadata +342 -0
|
@@ -0,0 +1,106 @@
|
|
|
1
|
+
<?xml version="1.0"?>
|
|
2
|
+
<formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
|
|
3
|
+
<format>
|
|
4
|
+
<puid>lias-fmt/101</puid>
|
|
5
|
+
<mime>text/xml</mime>
|
|
6
|
+
<name>Extensible Markup Language</name>
|
|
7
|
+
<version>1.0</version>
|
|
8
|
+
<alias>XML (1.0)</alias>
|
|
9
|
+
<pronom_id>638</pronom_id>
|
|
10
|
+
<extension>xml</extension>
|
|
11
|
+
<has_priority_over>fmt/101</has_priority_over>
|
|
12
|
+
<signature>
|
|
13
|
+
<name>XML 1.0</name>
|
|
14
|
+
<pattern>
|
|
15
|
+
<position>BOF</position>
|
|
16
|
+
<regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
|
|
17
|
+
</pattern>
|
|
18
|
+
</signature>
|
|
19
|
+
</format>
|
|
20
|
+
<format>
|
|
21
|
+
<puid>lias-fmt/001</puid>
|
|
22
|
+
<name>Textura TS print file</name>
|
|
23
|
+
<mime>text/plain</mime>
|
|
24
|
+
<signature>
|
|
25
|
+
<name>Textura TS print file</name>
|
|
26
|
+
<pattern>
|
|
27
|
+
<position>BOF</position>
|
|
28
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
|
|
29
|
+
</pattern>
|
|
30
|
+
<pattern>
|
|
31
|
+
<position>VAR</position>
|
|
32
|
+
<regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
|
|
33
|
+
</pattern>
|
|
34
|
+
<pattern>
|
|
35
|
+
<position>VAR</position>
|
|
36
|
+
<regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
|
|
37
|
+
</pattern>
|
|
38
|
+
<pattern>
|
|
39
|
+
<position>VAR</position>
|
|
40
|
+
<regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
|
|
41
|
+
</pattern>
|
|
42
|
+
<pattern>
|
|
43
|
+
<position>VAR</position>
|
|
44
|
+
<regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
|
|
45
|
+
</pattern>
|
|
46
|
+
<pattern>
|
|
47
|
+
<position>VAR</position>
|
|
48
|
+
<regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
|
|
49
|
+
</pattern>
|
|
50
|
+
<pattern>
|
|
51
|
+
<position>VAR</position>
|
|
52
|
+
<regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
|
|
53
|
+
</pattern>
|
|
54
|
+
<!--
|
|
55
|
+
-->
|
|
56
|
+
<pattern>
|
|
57
|
+
<position>VAR</position>
|
|
58
|
+
<regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
|
|
59
|
+
</pattern>
|
|
60
|
+
<pattern>
|
|
61
|
+
<position>EOF</position>
|
|
62
|
+
<regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
|
|
63
|
+
</pattern>
|
|
64
|
+
</signature>
|
|
65
|
+
</format>
|
|
66
|
+
<format>
|
|
67
|
+
<puid>lias-fmt/002</puid>
|
|
68
|
+
<name>Textura VP/S print file</name>
|
|
69
|
+
<mime>text/plain</mime>
|
|
70
|
+
<has_priority_over>lias-fmt/001</has_priority_over>
|
|
71
|
+
<signature>
|
|
72
|
+
<name>Textura VP/S print file</name>
|
|
73
|
+
<pattern>
|
|
74
|
+
<position>BOF</position>
|
|
75
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
|
|
76
|
+
</pattern>
|
|
77
|
+
<pattern>
|
|
78
|
+
<position>EOF</position>
|
|
79
|
+
<regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
|
|
80
|
+
</pattern>
|
|
81
|
+
</signature>
|
|
82
|
+
</format>
|
|
83
|
+
<format>
|
|
84
|
+
<puid>lias-fmt/189.word</puid>
|
|
85
|
+
<name>Microsoft Office Open XML - Word</name>
|
|
86
|
+
<extension>docx</extension>
|
|
87
|
+
<has_priority_over>x-fmt/263</has_priority_over>
|
|
88
|
+
<has_priority_over>fmt/189</has_priority_over>
|
|
89
|
+
<has_priority_over>fido-fmt/189.word</has_priority_over>
|
|
90
|
+
<signature>
|
|
91
|
+
<name>Microsoft Office Open XML - Word</name>
|
|
92
|
+
<pattern>
|
|
93
|
+
<position>BOF</position>
|
|
94
|
+
<regex>(?s)\APK\x03\x04</regex>
|
|
95
|
+
</pattern>
|
|
96
|
+
<pattern>
|
|
97
|
+
<position>BOF</position>
|
|
98
|
+
<regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
|
|
99
|
+
</pattern>
|
|
100
|
+
<pattern>
|
|
101
|
+
<position>EOF</position>
|
|
102
|
+
<regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
|
|
103
|
+
</pattern>
|
|
104
|
+
</signature>
|
|
105
|
+
</format>
|
|
106
|
+
</formats>
|
data/data/types.yml
ADDED
|
@@ -0,0 +1,213 @@
|
|
|
1
|
+
---
|
|
2
|
+
# This lists all the types the converters know about along with the mime types and file extensions.
|
|
3
|
+
# The first file extension in the list is the default one that will be used when a file of that type is created.
|
|
4
|
+
# The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
|
|
5
|
+
# class should take care of that.
|
|
6
|
+
# Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
|
|
7
|
+
# reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
|
|
8
|
+
# to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
|
|
9
|
+
|
|
10
|
+
IMAGE:
|
|
11
|
+
TIFF:
|
|
12
|
+
NAME: Tagged Image File Format (TIFF)
|
|
13
|
+
MIME: image/tiff
|
|
14
|
+
EXTENSIONS: tif,tiff
|
|
15
|
+
|
|
16
|
+
JPEG2000:
|
|
17
|
+
NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
|
|
18
|
+
MIME: image/jp2
|
|
19
|
+
EXTENSIONS: jp2
|
|
20
|
+
|
|
21
|
+
JPEG:
|
|
22
|
+
NAME: Joint Photographic Experts Group (JPEG)
|
|
23
|
+
MIME: image/jpeg
|
|
24
|
+
EXTENSIONS: jpg,jpe,jpeg
|
|
25
|
+
|
|
26
|
+
PNG:
|
|
27
|
+
NAME: Portable Network Graphics (PNG)
|
|
28
|
+
MIME: image/png
|
|
29
|
+
EXTENSIONS: png
|
|
30
|
+
|
|
31
|
+
BMP:
|
|
32
|
+
NAME: Device Independent Bitmap (DIP/BMP)
|
|
33
|
+
MIME: image/bmp,image/x-ms-bmp
|
|
34
|
+
EXTENSIONS: bmp
|
|
35
|
+
|
|
36
|
+
GIF:
|
|
37
|
+
NAME: Graphics Interchange Format (GIF)
|
|
38
|
+
MIME: image/gif
|
|
39
|
+
EXTENSIONS: gif
|
|
40
|
+
|
|
41
|
+
PBM:
|
|
42
|
+
NAME: Portable Bitmap Format (PBM)
|
|
43
|
+
PUID: fmt/409
|
|
44
|
+
MIME: image/x‑portable‑bitmap
|
|
45
|
+
EXTENSIONS: pbm
|
|
46
|
+
|
|
47
|
+
PGM:
|
|
48
|
+
NAME: Portable GrayMap Format (PGM)
|
|
49
|
+
PUID: fmt/406
|
|
50
|
+
MIME: image/x‑portable‑graymap
|
|
51
|
+
EXTENSIONS: pgm
|
|
52
|
+
|
|
53
|
+
PPM:
|
|
54
|
+
NAME: Portable Pixel Map (PPM)
|
|
55
|
+
PUID: fmt/408
|
|
56
|
+
MIME: image/x‑portable‑pixmap
|
|
57
|
+
EXTENSIONS: ppm
|
|
58
|
+
|
|
59
|
+
AUDIO:
|
|
60
|
+
|
|
61
|
+
WAV:
|
|
62
|
+
NAME: Waveform Audio File Format (WAVE)
|
|
63
|
+
MIME: audio/x-wav
|
|
64
|
+
EXTENSIONS: wav
|
|
65
|
+
|
|
66
|
+
MP3:
|
|
67
|
+
NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
|
|
68
|
+
MIME: audio/mpeg
|
|
69
|
+
EXTENSIONS: mp3
|
|
70
|
+
|
|
71
|
+
FLAC:
|
|
72
|
+
NAME: Free Lossless Audio Codec (FLAC)
|
|
73
|
+
MIME: audio/flac
|
|
74
|
+
EXTENSIONS: flac
|
|
75
|
+
|
|
76
|
+
VIDEO:
|
|
77
|
+
|
|
78
|
+
MPEG:
|
|
79
|
+
NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
|
|
80
|
+
MIME: video/mpeg
|
|
81
|
+
EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
|
|
82
|
+
|
|
83
|
+
MPEG4:
|
|
84
|
+
NAME: Moving Picture Experts Group (MPEG-4)
|
|
85
|
+
MIME: video/mp4
|
|
86
|
+
EXTENSIONS: mp4,mpeg4
|
|
87
|
+
|
|
88
|
+
MJP2:
|
|
89
|
+
NAME: Motion JPEG 2000 (MJP2)
|
|
90
|
+
MIME: video/mj2
|
|
91
|
+
EXTENSIONS: mj2, mjp2
|
|
92
|
+
|
|
93
|
+
QTFF:
|
|
94
|
+
NAME: QuickTime File Format (QTFF)
|
|
95
|
+
MIME: video/quicktime
|
|
96
|
+
EXTENSIONS: mov, qt
|
|
97
|
+
|
|
98
|
+
AVI:
|
|
99
|
+
NAME: Audio Video Interleave (AVI)
|
|
100
|
+
MIME: video/x-msvideo
|
|
101
|
+
EXTENSIONS: avi
|
|
102
|
+
|
|
103
|
+
OGGV:
|
|
104
|
+
NAME: OGG Video (OGGV)
|
|
105
|
+
MIME: video/ogg
|
|
106
|
+
EXTENSIONS: ogv
|
|
107
|
+
|
|
108
|
+
WMV:
|
|
109
|
+
NAME: Windows Media Video (WMV)
|
|
110
|
+
MIME: video/x-ms-wmv
|
|
111
|
+
EXTENSIONS: wmv
|
|
112
|
+
|
|
113
|
+
DV:
|
|
114
|
+
NAME: Digital Video (DV)
|
|
115
|
+
MIME: video/dv
|
|
116
|
+
EXTENSIONS: dv
|
|
117
|
+
|
|
118
|
+
FLASH:
|
|
119
|
+
NAME: Flash video (FLV)
|
|
120
|
+
MIME: video/x-flv
|
|
121
|
+
EXTENSIONS: flv
|
|
122
|
+
|
|
123
|
+
DOCUMENT:
|
|
124
|
+
|
|
125
|
+
TXT:
|
|
126
|
+
NAME: Unformatted text
|
|
127
|
+
MIME: text/plain
|
|
128
|
+
EXTENSIONS: txt
|
|
129
|
+
|
|
130
|
+
RTF:
|
|
131
|
+
NAME: Rich Text Format (RTF)
|
|
132
|
+
PUID: fmt/45
|
|
133
|
+
MIME: text/rtf application/rtf
|
|
134
|
+
EXTENSIONS: rtf
|
|
135
|
+
|
|
136
|
+
HTML:
|
|
137
|
+
NAME: HyperText Markup Language (HTML)
|
|
138
|
+
MIME: text/html
|
|
139
|
+
EXTENSIONS: html, htm
|
|
140
|
+
|
|
141
|
+
MSDOC:
|
|
142
|
+
NAME: Microsoft Word Document (DOC)
|
|
143
|
+
PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
|
|
144
|
+
MIME: application/vnd.ms-word application/msword
|
|
145
|
+
EXTENSIONS: doc
|
|
146
|
+
|
|
147
|
+
MSDOCX:
|
|
148
|
+
NAME: Microsoft Word OpenXML Document (DOCX)
|
|
149
|
+
PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
|
|
150
|
+
MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
|
151
|
+
EXTENSIONS: docx
|
|
152
|
+
|
|
153
|
+
MSXLS:
|
|
154
|
+
NAME: Microsoft Excel Spreadsheet (XLS)
|
|
155
|
+
MIME: application/vnd.ms-excel,application/msexcel
|
|
156
|
+
EXTENSIONS: xls
|
|
157
|
+
|
|
158
|
+
MSXLSX:
|
|
159
|
+
NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
|
|
160
|
+
PUID: fido-fmt/189.xl
|
|
161
|
+
MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
|
162
|
+
EXTENSIONS: xslx
|
|
163
|
+
|
|
164
|
+
MSPPT:
|
|
165
|
+
NAME: Microsoft Powerpoint Presentation (PPT)
|
|
166
|
+
MIME: application/vnd.ms-powerpoint,application/mspowerpoint
|
|
167
|
+
EXTENSIONS: ppt
|
|
168
|
+
|
|
169
|
+
MSPPTX:
|
|
170
|
+
NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
|
|
171
|
+
PUID: fido-fmt/189.ppt
|
|
172
|
+
MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
|
|
173
|
+
EXTENSIONS: pptx
|
|
174
|
+
|
|
175
|
+
PDF:
|
|
176
|
+
NAME: Adobe Portable Document Format (PDF)
|
|
177
|
+
PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
|
|
178
|
+
MIME: application/pdf
|
|
179
|
+
EXTENSIONS: pdf
|
|
180
|
+
|
|
181
|
+
PDFA:
|
|
182
|
+
NAME: Adobe Portable Document Format for Archives (PDFA)
|
|
183
|
+
PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
|
|
184
|
+
MIME: application/pdf
|
|
185
|
+
EXTENSIONS: pdf
|
|
186
|
+
|
|
187
|
+
WORDPERFECT:
|
|
188
|
+
NAME: WordPerfect Document (WPD)
|
|
189
|
+
PUID: x-fmt/44 x-fmt/394
|
|
190
|
+
MIME: application/vnd.wordperfect
|
|
191
|
+
EXTENSIONS: wpd
|
|
192
|
+
|
|
193
|
+
XML:
|
|
194
|
+
NAME: Extensible Markup Language (XML)
|
|
195
|
+
MIME: text/xml
|
|
196
|
+
PUID: fmt/101
|
|
197
|
+
EXTENSIONS: xml
|
|
198
|
+
|
|
199
|
+
SHAREPOINT_MAP:
|
|
200
|
+
NAME: Sharepoint mapping file
|
|
201
|
+
# This is again an invented mime type. Its actually an XML ...
|
|
202
|
+
MIME: text/xml/sharepoint_map
|
|
203
|
+
PUID: fmt/101
|
|
204
|
+
EXTENSIONS: xml
|
|
205
|
+
|
|
206
|
+
ARCHIVE:
|
|
207
|
+
|
|
208
|
+
EAD:
|
|
209
|
+
NAME: Encoded Archival Description (EAD)
|
|
210
|
+
# This is again an invented mime type. Its actually an XML ...
|
|
211
|
+
MIME: archive/ead
|
|
212
|
+
PUID: fmt/101
|
|
213
|
+
EXTENSIONS: ead,xml
|
|
@@ -0,0 +1,103 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
### require 'tools/string'
|
|
4
|
+
|
|
5
|
+
require 'libis/tools/logger'
|
|
6
|
+
require 'libis/format/type_database'
|
|
7
|
+
|
|
8
|
+
require_relative 'repository'
|
|
9
|
+
|
|
10
|
+
module Libis
|
|
11
|
+
module Format
|
|
12
|
+
module Converter
|
|
13
|
+
|
|
14
|
+
class Base
|
|
15
|
+
include Libis::Tools::Logger
|
|
16
|
+
|
|
17
|
+
def input_types
|
|
18
|
+
raise RuntimeError, 'Method #input_types needs to be overridden in converter'
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
protected
|
|
22
|
+
|
|
23
|
+
def output_types
|
|
24
|
+
raise RuntimeError, 'Method #output_types needs to be overridden in converter'
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
attr_accessor :source, :options, :flags
|
|
28
|
+
|
|
29
|
+
def init(_)
|
|
30
|
+
raise RuntimeError, 'Method #init should be implemented in converter'
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def do_convert(_, _)
|
|
34
|
+
raise RuntimeError, 'Method #do_convert should be implemented in converter'
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
public
|
|
38
|
+
|
|
39
|
+
def initialize( source = nil, options = {}, flags = {} )
|
|
40
|
+
@source = source
|
|
41
|
+
@options = options ? options : {}
|
|
42
|
+
@flags = flags ? flags : {}
|
|
43
|
+
init(source.to_s rescue nil)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def convert(target, format = nil)
|
|
47
|
+
do_convert(target, format)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def Base.inherited( klass )
|
|
51
|
+
|
|
52
|
+
Repository.register klass
|
|
53
|
+
|
|
54
|
+
class << self
|
|
55
|
+
|
|
56
|
+
def conversions
|
|
57
|
+
input_types.inject({}) do |input_type, hash|
|
|
58
|
+
hash[input_type] = output_types
|
|
59
|
+
hash
|
|
60
|
+
end
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def input_type?(type_id)
|
|
64
|
+
input_types.include? type_id
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
def output_type?(type_id)
|
|
68
|
+
output_types.include? type_id
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def input_mimetype?(mimetype)
|
|
72
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
|
73
|
+
input_type? type_id
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
def output_mimetype?(mimetype)
|
|
77
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
|
78
|
+
output_type? type_id
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
def conversion?(input_type, output_type)
|
|
82
|
+
conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
def output_for(input_type)
|
|
86
|
+
conversions[input_type]
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
def extension?(extension)
|
|
90
|
+
!TypeDatabase.ext_types(extension).first.nil?
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
|
|
96
|
+
end
|
|
97
|
+
|
|
98
|
+
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
end
|
|
102
|
+
end
|
|
103
|
+
end
|
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'fileutils'
|
|
4
|
+
|
|
5
|
+
require 'libis/tools/logger'
|
|
6
|
+
require 'libis/format/type_database'
|
|
7
|
+
|
|
8
|
+
module Libis
|
|
9
|
+
module Format
|
|
10
|
+
module Converter
|
|
11
|
+
|
|
12
|
+
class Chain
|
|
13
|
+
include ::Libis::Tools::Logger
|
|
14
|
+
|
|
15
|
+
def initialize(converter_chain)
|
|
16
|
+
@converter_chain = converter_chain
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
def to_array
|
|
20
|
+
@converter_chain
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
def convert(src_file, target_file, operations = [])
|
|
24
|
+
|
|
25
|
+
chain = @converter_chain.clone
|
|
26
|
+
|
|
27
|
+
my_operations = {}
|
|
28
|
+
|
|
29
|
+
# sanity check: check if the required operations are supported by at least one converter in the chain
|
|
30
|
+
operations.each do |k,v|
|
|
31
|
+
method = k.to_s.downcase.to_sym
|
|
32
|
+
chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
|
|
33
|
+
if chain_element
|
|
34
|
+
my_operations[chain_element[:converter]] ||= {}
|
|
35
|
+
my_operations[chain_element[:converter]][method] = v
|
|
36
|
+
else
|
|
37
|
+
error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
|
|
38
|
+
end
|
|
39
|
+
end
|
|
40
|
+
|
|
41
|
+
temp_files = []
|
|
42
|
+
|
|
43
|
+
# noinspection RubyParenthesesAroundConditionInspection
|
|
44
|
+
while (chain_element = chain.shift)
|
|
45
|
+
|
|
46
|
+
target_type = chain_element[:target]
|
|
47
|
+
converter_class = chain_element[:converter]
|
|
48
|
+
converter = converter_class.new(src_file)
|
|
49
|
+
|
|
50
|
+
my_operations[converter_class].each do |k,v|
|
|
51
|
+
converter.send k, v
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
target = target_file
|
|
55
|
+
|
|
56
|
+
unless chain.empty?
|
|
57
|
+
target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
|
|
58
|
+
target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
|
|
59
|
+
temp_files << target
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
FileUtils.mkdir_p File.dirname(target)
|
|
63
|
+
|
|
64
|
+
converter.convert(target, target_type)
|
|
65
|
+
|
|
66
|
+
src_file = target
|
|
67
|
+
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
temp_files.each do |f|
|
|
71
|
+
File.delete(f)
|
|
72
|
+
end
|
|
73
|
+
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
end
|
|
@@ -0,0 +1,110 @@
|
|
|
1
|
+
# coding: utf-8
|
|
2
|
+
|
|
3
|
+
require 'set'
|
|
4
|
+
require 'singleton'
|
|
5
|
+
|
|
6
|
+
require 'libis/tools/logger'
|
|
7
|
+
|
|
8
|
+
require_relative 'chain'
|
|
9
|
+
|
|
10
|
+
module Libis
|
|
11
|
+
module Format
|
|
12
|
+
module Converter
|
|
13
|
+
|
|
14
|
+
class Repository
|
|
15
|
+
include Singleton
|
|
16
|
+
include ::Libis::Tools::Logger
|
|
17
|
+
|
|
18
|
+
attr_reader :converters
|
|
19
|
+
attr_writer :converters_glob
|
|
20
|
+
|
|
21
|
+
def initialize
|
|
22
|
+
@converters = Set.new
|
|
23
|
+
@converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
def Repository.register(converter_class)
|
|
27
|
+
instance.converters.add? converter_class
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
def Repository.get_converters
|
|
31
|
+
if instance.converters.empty?
|
|
32
|
+
Dir.glob(instance.converters_glob).each do |filename|
|
|
33
|
+
# noinspection RubyResolve
|
|
34
|
+
require File.expand_path(filename)
|
|
35
|
+
end
|
|
36
|
+
end
|
|
37
|
+
instance.converters
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
def Repository.get_converter_chain(src_type, tgt_type, operations = [])
|
|
41
|
+
msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
|
|
42
|
+
chain_list = recursive_chain src_type, tgt_type, operations
|
|
43
|
+
if chain_list.length > 1
|
|
44
|
+
warn "Found more than one conversion chain for #{msg}. Picking the first one."
|
|
45
|
+
end
|
|
46
|
+
if chain_list.empty?
|
|
47
|
+
error "No conversion chain found for #{msg}"
|
|
48
|
+
return nil
|
|
49
|
+
end
|
|
50
|
+
chain_list.each do |chain|
|
|
51
|
+
msg = "Base chain: #{src_type.to_s}"
|
|
52
|
+
chain.each do |node|
|
|
53
|
+
msg += "->#{node[:converter].name}:#{node[:target].to_s}"
|
|
54
|
+
end
|
|
55
|
+
debug msg
|
|
56
|
+
end
|
|
57
|
+
::Libis::Format::Converters::Chain.new(chain_list[0])
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
private
|
|
61
|
+
|
|
62
|
+
def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
|
|
63
|
+
return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
|
|
64
|
+
|
|
65
|
+
get_converters.each do |converter|
|
|
66
|
+
if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
|
|
67
|
+
c[:converter] == converter and c[:target] == tgt_type }
|
|
68
|
+
node = Hash.new
|
|
69
|
+
node[:converter] = converter
|
|
70
|
+
node[:target] = tgt_type
|
|
71
|
+
sequence = current_chain.dup
|
|
72
|
+
sequence << node
|
|
73
|
+
# check if the chain supports all the operations
|
|
74
|
+
success = true
|
|
75
|
+
operations.each do |op, _|
|
|
76
|
+
success = false unless sequence.any? do |n|
|
|
77
|
+
n[:converter].new.respond_to? op.to_s.downcase.to_sym
|
|
78
|
+
end
|
|
79
|
+
end
|
|
80
|
+
if success
|
|
81
|
+
# we only want to remember the shortest converter chains
|
|
82
|
+
if !chains_found.empty? and sequence.length < chains_found[0].length
|
|
83
|
+
chains_found.clear
|
|
84
|
+
end
|
|
85
|
+
chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
|
|
86
|
+
end
|
|
87
|
+
end
|
|
88
|
+
end
|
|
89
|
+
|
|
90
|
+
return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
|
|
91
|
+
|
|
92
|
+
get_converters.each do |converter|
|
|
93
|
+
next unless converter.input_type? src_type
|
|
94
|
+
converter.output_types(src_type).each do |tmp_type|
|
|
95
|
+
# would like to enable the following for optimalization, but some operation may require such a step
|
|
96
|
+
# next if tmp_type == src_type
|
|
97
|
+
# next if current_chain.any? { |c| c[:target] == tmp_type}
|
|
98
|
+
recursive_chain(tmp_type, tgt_type, operations, chains_found,
|
|
99
|
+
current_chain.dup << {:converter => converter, :target => tmp_type})
|
|
100
|
+
end
|
|
101
|
+
end
|
|
102
|
+
|
|
103
|
+
chains_found
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
end
|
|
109
|
+
end
|
|
110
|
+
end
|
|
@@ -0,0 +1,38 @@
|
|
|
1
|
+
require 'os'
|
|
2
|
+
require 'tempfile'
|
|
3
|
+
require 'csv'
|
|
4
|
+
require 'singleton'
|
|
5
|
+
|
|
6
|
+
require 'libis/tools/extend/string'
|
|
7
|
+
require 'libis/tools/logger'
|
|
8
|
+
require 'libis/tools/command'
|
|
9
|
+
|
|
10
|
+
module Libis
|
|
11
|
+
module Format
|
|
12
|
+
|
|
13
|
+
class Droid
|
|
14
|
+
include ::Libis::Tools::Logger
|
|
15
|
+
include Singleton
|
|
16
|
+
|
|
17
|
+
def self.run(file)
|
|
18
|
+
instance.run file
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def run(file)
|
|
22
|
+
droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
|
|
23
|
+
droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
|
|
24
|
+
profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
|
|
25
|
+
report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
|
|
26
|
+
result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
|
|
27
|
+
warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
|
28
|
+
result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
|
|
29
|
+
warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
|
30
|
+
File.delete profile
|
|
31
|
+
result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
|
|
32
|
+
File.delete report
|
|
33
|
+
result.map{|r|r.to_hash}
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
end
|
|
38
|
+
end
|