libis-format 0.9.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.coveralls.yml +2 -0
- data/.gitignore +15 -0
- data/.travis.yml +36 -0
- data/Gemfile +5 -0
- data/LICENSE.txt +22 -0
- data/README.md +38 -0
- data/Rakefile +8 -0
- data/bin/droid +15 -0
- data/bin/fido +12 -0
- data/data/ISOcoated.icc +0 -0
- data/data/PDFA_def.ps +32 -0
- data/data/ead.xsd +2728 -0
- data/data/lias_formats.xml +106 -0
- data/data/types.yml +213 -0
- data/lib/libis/format/converter/base.rb +103 -0
- data/lib/libis/format/converter/chain.rb +80 -0
- data/lib/libis/format/converter/repository.rb +110 -0
- data/lib/libis/format/converter.rb +11 -0
- data/lib/libis/format/droid.rb +38 -0
- data/lib/libis/format/fido.rb +109 -0
- data/lib/libis/format/identifier.rb +185 -0
- data/lib/libis/format/type_database.rb +170 -0
- data/lib/libis/format/version.rb +5 -0
- data/lib/libis/format.rb +12 -0
- data/lib/libis-format.rb +1 -0
- data/libis-format.gemspec +30 -0
- data/spec/data/Cevennes2.bmp +0 -0
- data/spec/data/Cevennes2.jp2 +0 -0
- data/spec/data/Cevennes2.ppm +22492 -0
- data/spec/data/test-ead.xml +392 -0
- data/spec/data/test-jpg.tif +0 -0
- data/spec/data/test-lzw.tif +0 -0
- data/spec/data/test.bmp +0 -0
- data/spec/data/test.doc +0 -0
- data/spec/data/test.docx +0 -0
- data/spec/data/test.gif +0 -0
- data/spec/data/test.ods +0 -0
- data/spec/data/test.odt +0 -0
- data/spec/data/test.pdf +0 -0
- data/spec/data/test.png +0 -0
- data/spec/data/test.ps +8631 -0
- data/spec/data/test.psd +0 -0
- data/spec/data/test.rtf +1455 -0
- data/spec/data/test.tif +0 -0
- data/spec/data/test.txt +12 -0
- data/spec/data/test.xcf +0 -0
- data/spec/data/test.xls +0 -0
- data/spec/data/test.xlsx +0 -0
- data/spec/data/test.xml +4 -0
- data/spec/identifier_spec.rb +59 -0
- data/spec/spec_helper.rb +9 -0
- data/spec/test_types.yml +12 -0
- data/spec/type_database_spec.rb +140 -0
- data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
- data/tools/droid/container-signature-20150307.xml +2235 -0
- data/tools/droid/droid-command-line-6.1.5.jar +0 -0
- data/tools/droid/droid.bat +154 -0
- data/tools/droid/droid.sh +138 -0
- data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
- data/tools/droid/lib/activation-1.1.jar +0 -0
- data/tools/droid/lib/antlr-2.7.7.jar +0 -0
- data/tools/droid/lib/antlr-3.2.jar +0 -0
- data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
- data/tools/droid/lib/aopalliance-1.0.jar +0 -0
- data/tools/droid/lib/asm-2.2.3.jar +0 -0
- data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
- data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
- data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
- data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
- data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
- data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
- data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
- data/tools/droid/lib/classmate-1.0.0.jar +0 -0
- data/tools/droid/lib/commons-cli-1.2.jar +0 -0
- data/tools/droid/lib/commons-codec-1.4.jar +0 -0
- data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
- data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
- data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
- data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
- data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
- data/tools/droid/lib/commons-io-2.4.jar +0 -0
- data/tools/droid/lib/commons-lang-2.6.jar +0 -0
- data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
- data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
- data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
- data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
- data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
- data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
- data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
- data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
- data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
- data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
- data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
- data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
- data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
- data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
- data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
- data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
- data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
- data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
- data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
- data/tools/droid/lib/itext-2.0.8.jar +0 -0
- data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
- data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
- data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
- data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
- data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
- data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
- data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
- data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
- data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
- data/tools/droid/lib/jta-1.1.jar +0 -0
- data/tools/droid/lib/log4j-1.2.13.jar +0 -0
- data/tools/droid/lib/neethi-2.0.4.jar +0 -0
- data/tools/droid/lib/opencsv-2.3.jar +0 -0
- data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
- data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
- data/tools/droid/lib/poi-3.7.jar +0 -0
- data/tools/droid/lib/saaj-api-1.3.jar +0 -0
- data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
- data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
- data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
- data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
- data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
- data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
- data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
- data/tools/droid/lib/truezip-6.8.4.jar +0 -0
- data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
- data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
- data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
- data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
- data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
- data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
- data/tools/droid/lib/xz-1.0.jar +0 -0
- data/tools/fido/__init__.py +0 -0
- data/tools/fido/argparselocal.py +2355 -0
- data/tools/fido/argparselocal.pyc +0 -0
- data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
- data/tools/fido/conf/container-signature-20150307.xml +2238 -0
- data/tools/fido/conf/dc.xsd +119 -0
- data/tools/fido/conf/dcmitype.xsd +53 -0
- data/tools/fido/conf/dcterms.xsd +383 -0
- data/tools/fido/conf/fido-formats.xsd +173 -0
- data/tools/fido/conf/format_extension_template.xml +105 -0
- data/tools/fido/conf/format_extensions.xml +498 -0
- data/tools/fido/conf/formats-v81.xml +38355 -0
- data/tools/fido/conf/pronom-xml-v81.zip +0 -0
- data/tools/fido/conf/versions.xml +8 -0
- data/tools/fido/fido.bat +4 -0
- data/tools/fido/fido.py +854 -0
- data/tools/fido/fido.sh +5 -0
- data/tools/fido/prepare.py +616 -0
- data/tools/fido/pronomutils.py +115 -0
- data/tools/fido/toxml.py +52 -0
- data/tools/fido/update_signatures.py +171 -0
- metadata +342 -0
@@ -0,0 +1,106 @@
|
|
1
|
+
<?xml version="1.0"?>
|
2
|
+
<formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
|
3
|
+
<format>
|
4
|
+
<puid>lias-fmt/101</puid>
|
5
|
+
<mime>text/xml</mime>
|
6
|
+
<name>Extensible Markup Language</name>
|
7
|
+
<version>1.0</version>
|
8
|
+
<alias>XML (1.0)</alias>
|
9
|
+
<pronom_id>638</pronom_id>
|
10
|
+
<extension>xml</extension>
|
11
|
+
<has_priority_over>fmt/101</has_priority_over>
|
12
|
+
<signature>
|
13
|
+
<name>XML 1.0</name>
|
14
|
+
<pattern>
|
15
|
+
<position>BOF</position>
|
16
|
+
<regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
|
17
|
+
</pattern>
|
18
|
+
</signature>
|
19
|
+
</format>
|
20
|
+
<format>
|
21
|
+
<puid>lias-fmt/001</puid>
|
22
|
+
<name>Textura TS print file</name>
|
23
|
+
<mime>text/plain</mime>
|
24
|
+
<signature>
|
25
|
+
<name>Textura TS print file</name>
|
26
|
+
<pattern>
|
27
|
+
<position>BOF</position>
|
28
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
|
29
|
+
</pattern>
|
30
|
+
<pattern>
|
31
|
+
<position>VAR</position>
|
32
|
+
<regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
|
33
|
+
</pattern>
|
34
|
+
<pattern>
|
35
|
+
<position>VAR</position>
|
36
|
+
<regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
|
37
|
+
</pattern>
|
38
|
+
<pattern>
|
39
|
+
<position>VAR</position>
|
40
|
+
<regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
|
41
|
+
</pattern>
|
42
|
+
<pattern>
|
43
|
+
<position>VAR</position>
|
44
|
+
<regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
|
45
|
+
</pattern>
|
46
|
+
<pattern>
|
47
|
+
<position>VAR</position>
|
48
|
+
<regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
|
49
|
+
</pattern>
|
50
|
+
<pattern>
|
51
|
+
<position>VAR</position>
|
52
|
+
<regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
|
53
|
+
</pattern>
|
54
|
+
<!--
|
55
|
+
-->
|
56
|
+
<pattern>
|
57
|
+
<position>VAR</position>
|
58
|
+
<regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
|
59
|
+
</pattern>
|
60
|
+
<pattern>
|
61
|
+
<position>EOF</position>
|
62
|
+
<regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
|
63
|
+
</pattern>
|
64
|
+
</signature>
|
65
|
+
</format>
|
66
|
+
<format>
|
67
|
+
<puid>lias-fmt/002</puid>
|
68
|
+
<name>Textura VP/S print file</name>
|
69
|
+
<mime>text/plain</mime>
|
70
|
+
<has_priority_over>lias-fmt/001</has_priority_over>
|
71
|
+
<signature>
|
72
|
+
<name>Textura VP/S print file</name>
|
73
|
+
<pattern>
|
74
|
+
<position>BOF</position>
|
75
|
+
<regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
|
76
|
+
</pattern>
|
77
|
+
<pattern>
|
78
|
+
<position>EOF</position>
|
79
|
+
<regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
|
80
|
+
</pattern>
|
81
|
+
</signature>
|
82
|
+
</format>
|
83
|
+
<format>
|
84
|
+
<puid>lias-fmt/189.word</puid>
|
85
|
+
<name>Microsoft Office Open XML - Word</name>
|
86
|
+
<extension>docx</extension>
|
87
|
+
<has_priority_over>x-fmt/263</has_priority_over>
|
88
|
+
<has_priority_over>fmt/189</has_priority_over>
|
89
|
+
<has_priority_over>fido-fmt/189.word</has_priority_over>
|
90
|
+
<signature>
|
91
|
+
<name>Microsoft Office Open XML - Word</name>
|
92
|
+
<pattern>
|
93
|
+
<position>BOF</position>
|
94
|
+
<regex>(?s)\APK\x03\x04</regex>
|
95
|
+
</pattern>
|
96
|
+
<pattern>
|
97
|
+
<position>BOF</position>
|
98
|
+
<regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
|
99
|
+
</pattern>
|
100
|
+
<pattern>
|
101
|
+
<position>EOF</position>
|
102
|
+
<regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
|
103
|
+
</pattern>
|
104
|
+
</signature>
|
105
|
+
</format>
|
106
|
+
</formats>
|
data/data/types.yml
ADDED
@@ -0,0 +1,213 @@
|
|
1
|
+
---
|
2
|
+
# This lists all the types the converters know about along with the mime types and file extensions.
|
3
|
+
# The first file extension in the list is the default one that will be used when a file of that type is created.
|
4
|
+
# The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
|
5
|
+
# class should take care of that.
|
6
|
+
# Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
|
7
|
+
# reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
|
8
|
+
# to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
|
9
|
+
|
10
|
+
IMAGE:
|
11
|
+
TIFF:
|
12
|
+
NAME: Tagged Image File Format (TIFF)
|
13
|
+
MIME: image/tiff
|
14
|
+
EXTENSIONS: tif,tiff
|
15
|
+
|
16
|
+
JPEG2000:
|
17
|
+
NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
|
18
|
+
MIME: image/jp2
|
19
|
+
EXTENSIONS: jp2
|
20
|
+
|
21
|
+
JPEG:
|
22
|
+
NAME: Joint Photographic Experts Group (JPEG)
|
23
|
+
MIME: image/jpeg
|
24
|
+
EXTENSIONS: jpg,jpe,jpeg
|
25
|
+
|
26
|
+
PNG:
|
27
|
+
NAME: Portable Network Graphics (PNG)
|
28
|
+
MIME: image/png
|
29
|
+
EXTENSIONS: png
|
30
|
+
|
31
|
+
BMP:
|
32
|
+
NAME: Device Independent Bitmap (DIP/BMP)
|
33
|
+
MIME: image/bmp,image/x-ms-bmp
|
34
|
+
EXTENSIONS: bmp
|
35
|
+
|
36
|
+
GIF:
|
37
|
+
NAME: Graphics Interchange Format (GIF)
|
38
|
+
MIME: image/gif
|
39
|
+
EXTENSIONS: gif
|
40
|
+
|
41
|
+
PBM:
|
42
|
+
NAME: Portable Bitmap Format (PBM)
|
43
|
+
PUID: fmt/409
|
44
|
+
MIME: image/x‑portable‑bitmap
|
45
|
+
EXTENSIONS: pbm
|
46
|
+
|
47
|
+
PGM:
|
48
|
+
NAME: Portable GrayMap Format (PGM)
|
49
|
+
PUID: fmt/406
|
50
|
+
MIME: image/x‑portable‑graymap
|
51
|
+
EXTENSIONS: pgm
|
52
|
+
|
53
|
+
PPM:
|
54
|
+
NAME: Portable Pixel Map (PPM)
|
55
|
+
PUID: fmt/408
|
56
|
+
MIME: image/x‑portable‑pixmap
|
57
|
+
EXTENSIONS: ppm
|
58
|
+
|
59
|
+
AUDIO:
|
60
|
+
|
61
|
+
WAV:
|
62
|
+
NAME: Waveform Audio File Format (WAVE)
|
63
|
+
MIME: audio/x-wav
|
64
|
+
EXTENSIONS: wav
|
65
|
+
|
66
|
+
MP3:
|
67
|
+
NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
|
68
|
+
MIME: audio/mpeg
|
69
|
+
EXTENSIONS: mp3
|
70
|
+
|
71
|
+
FLAC:
|
72
|
+
NAME: Free Lossless Audio Codec (FLAC)
|
73
|
+
MIME: audio/flac
|
74
|
+
EXTENSIONS: flac
|
75
|
+
|
76
|
+
VIDEO:
|
77
|
+
|
78
|
+
MPEG:
|
79
|
+
NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
|
80
|
+
MIME: video/mpeg
|
81
|
+
EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
|
82
|
+
|
83
|
+
MPEG4:
|
84
|
+
NAME: Moving Picture Experts Group (MPEG-4)
|
85
|
+
MIME: video/mp4
|
86
|
+
EXTENSIONS: mp4,mpeg4
|
87
|
+
|
88
|
+
MJP2:
|
89
|
+
NAME: Motion JPEG 2000 (MJP2)
|
90
|
+
MIME: video/mj2
|
91
|
+
EXTENSIONS: mj2, mjp2
|
92
|
+
|
93
|
+
QTFF:
|
94
|
+
NAME: QuickTime File Format (QTFF)
|
95
|
+
MIME: video/quicktime
|
96
|
+
EXTENSIONS: mov, qt
|
97
|
+
|
98
|
+
AVI:
|
99
|
+
NAME: Audio Video Interleave (AVI)
|
100
|
+
MIME: video/x-msvideo
|
101
|
+
EXTENSIONS: avi
|
102
|
+
|
103
|
+
OGGV:
|
104
|
+
NAME: OGG Video (OGGV)
|
105
|
+
MIME: video/ogg
|
106
|
+
EXTENSIONS: ogv
|
107
|
+
|
108
|
+
WMV:
|
109
|
+
NAME: Windows Media Video (WMV)
|
110
|
+
MIME: video/x-ms-wmv
|
111
|
+
EXTENSIONS: wmv
|
112
|
+
|
113
|
+
DV:
|
114
|
+
NAME: Digital Video (DV)
|
115
|
+
MIME: video/dv
|
116
|
+
EXTENSIONS: dv
|
117
|
+
|
118
|
+
FLASH:
|
119
|
+
NAME: Flash video (FLV)
|
120
|
+
MIME: video/x-flv
|
121
|
+
EXTENSIONS: flv
|
122
|
+
|
123
|
+
DOCUMENT:
|
124
|
+
|
125
|
+
TXT:
|
126
|
+
NAME: Unformatted text
|
127
|
+
MIME: text/plain
|
128
|
+
EXTENSIONS: txt
|
129
|
+
|
130
|
+
RTF:
|
131
|
+
NAME: Rich Text Format (RTF)
|
132
|
+
PUID: fmt/45
|
133
|
+
MIME: text/rtf application/rtf
|
134
|
+
EXTENSIONS: rtf
|
135
|
+
|
136
|
+
HTML:
|
137
|
+
NAME: HyperText Markup Language (HTML)
|
138
|
+
MIME: text/html
|
139
|
+
EXTENSIONS: html, htm
|
140
|
+
|
141
|
+
MSDOC:
|
142
|
+
NAME: Microsoft Word Document (DOC)
|
143
|
+
PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
|
144
|
+
MIME: application/vnd.ms-word application/msword
|
145
|
+
EXTENSIONS: doc
|
146
|
+
|
147
|
+
MSDOCX:
|
148
|
+
NAME: Microsoft Word OpenXML Document (DOCX)
|
149
|
+
PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
|
150
|
+
MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
|
151
|
+
EXTENSIONS: docx
|
152
|
+
|
153
|
+
MSXLS:
|
154
|
+
NAME: Microsoft Excel Spreadsheet (XLS)
|
155
|
+
MIME: application/vnd.ms-excel,application/msexcel
|
156
|
+
EXTENSIONS: xls
|
157
|
+
|
158
|
+
MSXLSX:
|
159
|
+
NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
|
160
|
+
PUID: fido-fmt/189.xl
|
161
|
+
MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
|
162
|
+
EXTENSIONS: xslx
|
163
|
+
|
164
|
+
MSPPT:
|
165
|
+
NAME: Microsoft Powerpoint Presentation (PPT)
|
166
|
+
MIME: application/vnd.ms-powerpoint,application/mspowerpoint
|
167
|
+
EXTENSIONS: ppt
|
168
|
+
|
169
|
+
MSPPTX:
|
170
|
+
NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
|
171
|
+
PUID: fido-fmt/189.ppt
|
172
|
+
MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
|
173
|
+
EXTENSIONS: pptx
|
174
|
+
|
175
|
+
PDF:
|
176
|
+
NAME: Adobe Portable Document Format (PDF)
|
177
|
+
PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
|
178
|
+
MIME: application/pdf
|
179
|
+
EXTENSIONS: pdf
|
180
|
+
|
181
|
+
PDFA:
|
182
|
+
NAME: Adobe Portable Document Format for Archives (PDFA)
|
183
|
+
PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
|
184
|
+
MIME: application/pdf
|
185
|
+
EXTENSIONS: pdf
|
186
|
+
|
187
|
+
WORDPERFECT:
|
188
|
+
NAME: WordPerfect Document (WPD)
|
189
|
+
PUID: x-fmt/44 x-fmt/394
|
190
|
+
MIME: application/vnd.wordperfect
|
191
|
+
EXTENSIONS: wpd
|
192
|
+
|
193
|
+
XML:
|
194
|
+
NAME: Extensible Markup Language (XML)
|
195
|
+
MIME: text/xml
|
196
|
+
PUID: fmt/101
|
197
|
+
EXTENSIONS: xml
|
198
|
+
|
199
|
+
SHAREPOINT_MAP:
|
200
|
+
NAME: Sharepoint mapping file
|
201
|
+
# This is again an invented mime type. Its actually an XML ...
|
202
|
+
MIME: text/xml/sharepoint_map
|
203
|
+
PUID: fmt/101
|
204
|
+
EXTENSIONS: xml
|
205
|
+
|
206
|
+
ARCHIVE:
|
207
|
+
|
208
|
+
EAD:
|
209
|
+
NAME: Encoded Archival Description (EAD)
|
210
|
+
# This is again an invented mime type. Its actually an XML ...
|
211
|
+
MIME: archive/ead
|
212
|
+
PUID: fmt/101
|
213
|
+
EXTENSIONS: ead,xml
|
@@ -0,0 +1,103 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
### require 'tools/string'
|
4
|
+
|
5
|
+
require 'libis/tools/logger'
|
6
|
+
require 'libis/format/type_database'
|
7
|
+
|
8
|
+
require_relative 'repository'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
module Converter
|
13
|
+
|
14
|
+
class Base
|
15
|
+
include Libis::Tools::Logger
|
16
|
+
|
17
|
+
def input_types
|
18
|
+
raise RuntimeError, 'Method #input_types needs to be overridden in converter'
|
19
|
+
end
|
20
|
+
|
21
|
+
protected
|
22
|
+
|
23
|
+
def output_types
|
24
|
+
raise RuntimeError, 'Method #output_types needs to be overridden in converter'
|
25
|
+
end
|
26
|
+
|
27
|
+
attr_accessor :source, :options, :flags
|
28
|
+
|
29
|
+
def init(_)
|
30
|
+
raise RuntimeError, 'Method #init should be implemented in converter'
|
31
|
+
end
|
32
|
+
|
33
|
+
def do_convert(_, _)
|
34
|
+
raise RuntimeError, 'Method #do_convert should be implemented in converter'
|
35
|
+
end
|
36
|
+
|
37
|
+
public
|
38
|
+
|
39
|
+
def initialize( source = nil, options = {}, flags = {} )
|
40
|
+
@source = source
|
41
|
+
@options = options ? options : {}
|
42
|
+
@flags = flags ? flags : {}
|
43
|
+
init(source.to_s rescue nil)
|
44
|
+
end
|
45
|
+
|
46
|
+
def convert(target, format = nil)
|
47
|
+
do_convert(target, format)
|
48
|
+
end
|
49
|
+
|
50
|
+
def Base.inherited( klass )
|
51
|
+
|
52
|
+
Repository.register klass
|
53
|
+
|
54
|
+
class << self
|
55
|
+
|
56
|
+
def conversions
|
57
|
+
input_types.inject({}) do |input_type, hash|
|
58
|
+
hash[input_type] = output_types
|
59
|
+
hash
|
60
|
+
end
|
61
|
+
end
|
62
|
+
|
63
|
+
def input_type?(type_id)
|
64
|
+
input_types.include? type_id
|
65
|
+
end
|
66
|
+
|
67
|
+
def output_type?(type_id)
|
68
|
+
output_types.include? type_id
|
69
|
+
end
|
70
|
+
|
71
|
+
def input_mimetype?(mimetype)
|
72
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
73
|
+
input_type? type_id
|
74
|
+
end
|
75
|
+
|
76
|
+
def output_mimetype?(mimetype)
|
77
|
+
type_id = TypeDatabase.instance.mime_types(mimetype).first
|
78
|
+
output_type? type_id
|
79
|
+
end
|
80
|
+
|
81
|
+
def conversion?(input_type, output_type)
|
82
|
+
conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
|
83
|
+
end
|
84
|
+
|
85
|
+
def output_for(input_type)
|
86
|
+
conversions[input_type]
|
87
|
+
end
|
88
|
+
|
89
|
+
def extension?(extension)
|
90
|
+
!TypeDatabase.ext_types(extension).first.nil?
|
91
|
+
end
|
92
|
+
|
93
|
+
end
|
94
|
+
|
95
|
+
|
96
|
+
end
|
97
|
+
|
98
|
+
|
99
|
+
end
|
100
|
+
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
require 'libis/tools/logger'
|
6
|
+
require 'libis/format/type_database'
|
7
|
+
|
8
|
+
module Libis
|
9
|
+
module Format
|
10
|
+
module Converter
|
11
|
+
|
12
|
+
class Chain
|
13
|
+
include ::Libis::Tools::Logger
|
14
|
+
|
15
|
+
def initialize(converter_chain)
|
16
|
+
@converter_chain = converter_chain
|
17
|
+
end
|
18
|
+
|
19
|
+
def to_array
|
20
|
+
@converter_chain
|
21
|
+
end
|
22
|
+
|
23
|
+
def convert(src_file, target_file, operations = [])
|
24
|
+
|
25
|
+
chain = @converter_chain.clone
|
26
|
+
|
27
|
+
my_operations = {}
|
28
|
+
|
29
|
+
# sanity check: check if the required operations are supported by at least one converter in the chain
|
30
|
+
operations.each do |k,v|
|
31
|
+
method = k.to_s.downcase.to_sym
|
32
|
+
chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
|
33
|
+
if chain_element
|
34
|
+
my_operations[chain_element[:converter]] ||= {}
|
35
|
+
my_operations[chain_element[:converter]][method] = v
|
36
|
+
else
|
37
|
+
error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
temp_files = []
|
42
|
+
|
43
|
+
# noinspection RubyParenthesesAroundConditionInspection
|
44
|
+
while (chain_element = chain.shift)
|
45
|
+
|
46
|
+
target_type = chain_element[:target]
|
47
|
+
converter_class = chain_element[:converter]
|
48
|
+
converter = converter_class.new(src_file)
|
49
|
+
|
50
|
+
my_operations[converter_class].each do |k,v|
|
51
|
+
converter.send k, v
|
52
|
+
end
|
53
|
+
|
54
|
+
target = target_file
|
55
|
+
|
56
|
+
unless chain.empty?
|
57
|
+
target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
|
58
|
+
target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
|
59
|
+
temp_files << target
|
60
|
+
end
|
61
|
+
|
62
|
+
FileUtils.mkdir_p File.dirname(target)
|
63
|
+
|
64
|
+
converter.convert(target, target_type)
|
65
|
+
|
66
|
+
src_file = target
|
67
|
+
|
68
|
+
end
|
69
|
+
|
70
|
+
temp_files.each do |f|
|
71
|
+
File.delete(f)
|
72
|
+
end
|
73
|
+
|
74
|
+
end
|
75
|
+
|
76
|
+
end
|
77
|
+
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
@@ -0,0 +1,110 @@
|
|
1
|
+
# coding: utf-8
|
2
|
+
|
3
|
+
require 'set'
|
4
|
+
require 'singleton'
|
5
|
+
|
6
|
+
require 'libis/tools/logger'
|
7
|
+
|
8
|
+
require_relative 'chain'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
module Converter
|
13
|
+
|
14
|
+
class Repository
|
15
|
+
include Singleton
|
16
|
+
include ::Libis::Tools::Logger
|
17
|
+
|
18
|
+
attr_reader :converters
|
19
|
+
attr_writer :converters_glob
|
20
|
+
|
21
|
+
def initialize
|
22
|
+
@converters = Set.new
|
23
|
+
@converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
|
24
|
+
end
|
25
|
+
|
26
|
+
def Repository.register(converter_class)
|
27
|
+
instance.converters.add? converter_class
|
28
|
+
end
|
29
|
+
|
30
|
+
def Repository.get_converters
|
31
|
+
if instance.converters.empty?
|
32
|
+
Dir.glob(instance.converters_glob).each do |filename|
|
33
|
+
# noinspection RubyResolve
|
34
|
+
require File.expand_path(filename)
|
35
|
+
end
|
36
|
+
end
|
37
|
+
instance.converters
|
38
|
+
end
|
39
|
+
|
40
|
+
def Repository.get_converter_chain(src_type, tgt_type, operations = [])
|
41
|
+
msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
|
42
|
+
chain_list = recursive_chain src_type, tgt_type, operations
|
43
|
+
if chain_list.length > 1
|
44
|
+
warn "Found more than one conversion chain for #{msg}. Picking the first one."
|
45
|
+
end
|
46
|
+
if chain_list.empty?
|
47
|
+
error "No conversion chain found for #{msg}"
|
48
|
+
return nil
|
49
|
+
end
|
50
|
+
chain_list.each do |chain|
|
51
|
+
msg = "Base chain: #{src_type.to_s}"
|
52
|
+
chain.each do |node|
|
53
|
+
msg += "->#{node[:converter].name}:#{node[:target].to_s}"
|
54
|
+
end
|
55
|
+
debug msg
|
56
|
+
end
|
57
|
+
::Libis::Format::Converters::Chain.new(chain_list[0])
|
58
|
+
end
|
59
|
+
|
60
|
+
private
|
61
|
+
|
62
|
+
def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
|
63
|
+
return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
|
64
|
+
|
65
|
+
get_converters.each do |converter|
|
66
|
+
if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
|
67
|
+
c[:converter] == converter and c[:target] == tgt_type }
|
68
|
+
node = Hash.new
|
69
|
+
node[:converter] = converter
|
70
|
+
node[:target] = tgt_type
|
71
|
+
sequence = current_chain.dup
|
72
|
+
sequence << node
|
73
|
+
# check if the chain supports all the operations
|
74
|
+
success = true
|
75
|
+
operations.each do |op, _|
|
76
|
+
success = false unless sequence.any? do |n|
|
77
|
+
n[:converter].new.respond_to? op.to_s.downcase.to_sym
|
78
|
+
end
|
79
|
+
end
|
80
|
+
if success
|
81
|
+
# we only want to remember the shortest converter chains
|
82
|
+
if !chains_found.empty? and sequence.length < chains_found[0].length
|
83
|
+
chains_found.clear
|
84
|
+
end
|
85
|
+
chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
|
86
|
+
end
|
87
|
+
end
|
88
|
+
end
|
89
|
+
|
90
|
+
return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
|
91
|
+
|
92
|
+
get_converters.each do |converter|
|
93
|
+
next unless converter.input_type? src_type
|
94
|
+
converter.output_types(src_type).each do |tmp_type|
|
95
|
+
# would like to enable the following for optimalization, but some operation may require such a step
|
96
|
+
# next if tmp_type == src_type
|
97
|
+
# next if current_chain.any? { |c| c[:target] == tmp_type}
|
98
|
+
recursive_chain(tmp_type, tgt_type, operations, chains_found,
|
99
|
+
current_chain.dup << {:converter => converter, :target => tmp_type})
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
chains_found
|
104
|
+
end
|
105
|
+
|
106
|
+
end
|
107
|
+
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
require 'os'
|
2
|
+
require 'tempfile'
|
3
|
+
require 'csv'
|
4
|
+
require 'singleton'
|
5
|
+
|
6
|
+
require 'libis/tools/extend/string'
|
7
|
+
require 'libis/tools/logger'
|
8
|
+
require 'libis/tools/command'
|
9
|
+
|
10
|
+
module Libis
|
11
|
+
module Format
|
12
|
+
|
13
|
+
class Droid
|
14
|
+
include ::Libis::Tools::Logger
|
15
|
+
include Singleton
|
16
|
+
|
17
|
+
def self.run(file)
|
18
|
+
instance.run file
|
19
|
+
end
|
20
|
+
|
21
|
+
def run(file)
|
22
|
+
droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
|
23
|
+
droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
|
24
|
+
profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
|
25
|
+
report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
|
26
|
+
result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
|
27
|
+
warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
28
|
+
result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
|
29
|
+
warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
|
30
|
+
File.delete profile
|
31
|
+
result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
|
32
|
+
File.delete report
|
33
|
+
result.map{|r|r.to_hash}
|
34
|
+
end
|
35
|
+
end
|
36
|
+
|
37
|
+
end
|
38
|
+
end
|