libis-format 0.9.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,106 @@
1
+ <?xml version="1.0"?>
2
+ <formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
3
+ <format>
4
+ <puid>lias-fmt/101</puid>
5
+ <mime>text/xml</mime>
6
+ <name>Extensible Markup Language</name>
7
+ <version>1.0</version>
8
+ <alias>XML (1.0)</alias>
9
+ <pronom_id>638</pronom_id>
10
+ <extension>xml</extension>
11
+ <has_priority_over>fmt/101</has_priority_over>
12
+ <signature>
13
+ <name>XML 1.0</name>
14
+ <pattern>
15
+ <position>BOF</position>
16
+ <regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
17
+ </pattern>
18
+ </signature>
19
+ </format>
20
+ <format>
21
+ <puid>lias-fmt/001</puid>
22
+ <name>Textura TS print file</name>
23
+ <mime>text/plain</mime>
24
+ <signature>
25
+ <name>Textura TS print file</name>
26
+ <pattern>
27
+ <position>BOF</position>
28
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
29
+ </pattern>
30
+ <pattern>
31
+ <position>VAR</position>
32
+ <regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
33
+ </pattern>
34
+ <pattern>
35
+ <position>VAR</position>
36
+ <regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
37
+ </pattern>
38
+ <pattern>
39
+ <position>VAR</position>
40
+ <regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
41
+ </pattern>
42
+ <pattern>
43
+ <position>VAR</position>
44
+ <regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
45
+ </pattern>
46
+ <pattern>
47
+ <position>VAR</position>
48
+ <regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
49
+ </pattern>
50
+ <pattern>
51
+ <position>VAR</position>
52
+ <regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
53
+ </pattern>
54
+ <!--
55
+ -->
56
+ <pattern>
57
+ <position>VAR</position>
58
+ <regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
59
+ </pattern>
60
+ <pattern>
61
+ <position>EOF</position>
62
+ <regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
63
+ </pattern>
64
+ </signature>
65
+ </format>
66
+ <format>
67
+ <puid>lias-fmt/002</puid>
68
+ <name>Textura VP/S print file</name>
69
+ <mime>text/plain</mime>
70
+ <has_priority_over>lias-fmt/001</has_priority_over>
71
+ <signature>
72
+ <name>Textura VP/S print file</name>
73
+ <pattern>
74
+ <position>BOF</position>
75
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
76
+ </pattern>
77
+ <pattern>
78
+ <position>EOF</position>
79
+ <regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
80
+ </pattern>
81
+ </signature>
82
+ </format>
83
+ <format>
84
+ <puid>lias-fmt/189.word</puid>
85
+ <name>Microsoft Office Open XML - Word</name>
86
+ <extension>docx</extension>
87
+ <has_priority_over>x-fmt/263</has_priority_over>
88
+ <has_priority_over>fmt/189</has_priority_over>
89
+ <has_priority_over>fido-fmt/189.word</has_priority_over>
90
+ <signature>
91
+ <name>Microsoft Office Open XML - Word</name>
92
+ <pattern>
93
+ <position>BOF</position>
94
+ <regex>(?s)\APK\x03\x04</regex>
95
+ </pattern>
96
+ <pattern>
97
+ <position>BOF</position>
98
+ <regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
99
+ </pattern>
100
+ <pattern>
101
+ <position>EOF</position>
102
+ <regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
103
+ </pattern>
104
+ </signature>
105
+ </format>
106
+ </formats>
data/data/types.yml ADDED
@@ -0,0 +1,213 @@
1
+ ---
2
+ # This lists all the types the converters know about along with the mime types and file extensions.
3
+ # The first file extension in the list is the default one that will be used when a file of that type is created.
4
+ # The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
5
+ # class should take care of that.
6
+ # Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
7
+ # reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
8
+ # to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
9
+
10
+ IMAGE:
11
+ TIFF:
12
+ NAME: Tagged Image File Format (TIFF)
13
+ MIME: image/tiff
14
+ EXTENSIONS: tif,tiff
15
+
16
+ JPEG2000:
17
+ NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
18
+ MIME: image/jp2
19
+ EXTENSIONS: jp2
20
+
21
+ JPEG:
22
+ NAME: Joint Photographic Experts Group (JPEG)
23
+ MIME: image/jpeg
24
+ EXTENSIONS: jpg,jpe,jpeg
25
+
26
+ PNG:
27
+ NAME: Portable Network Graphics (PNG)
28
+ MIME: image/png
29
+ EXTENSIONS: png
30
+
31
+ BMP:
32
+ NAME: Device Independent Bitmap (DIP/BMP)
33
+ MIME: image/bmp,image/x-ms-bmp
34
+ EXTENSIONS: bmp
35
+
36
+ GIF:
37
+ NAME: Graphics Interchange Format (GIF)
38
+ MIME: image/gif
39
+ EXTENSIONS: gif
40
+
41
+ PBM:
42
+ NAME: Portable Bitmap Format (PBM)
43
+ PUID: fmt/409
44
+ MIME: image/x‑portable‑bitmap
45
+ EXTENSIONS: pbm
46
+
47
+ PGM:
48
+ NAME: Portable GrayMap Format (PGM)
49
+ PUID: fmt/406
50
+ MIME: image/x‑portable‑graymap
51
+ EXTENSIONS: pgm
52
+
53
+ PPM:
54
+ NAME: Portable Pixel Map (PPM)
55
+ PUID: fmt/408
56
+ MIME: image/x‑portable‑pixmap
57
+ EXTENSIONS: ppm
58
+
59
+ AUDIO:
60
+
61
+ WAV:
62
+ NAME: Waveform Audio File Format (WAVE)
63
+ MIME: audio/x-wav
64
+ EXTENSIONS: wav
65
+
66
+ MP3:
67
+ NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
68
+ MIME: audio/mpeg
69
+ EXTENSIONS: mp3
70
+
71
+ FLAC:
72
+ NAME: Free Lossless Audio Codec (FLAC)
73
+ MIME: audio/flac
74
+ EXTENSIONS: flac
75
+
76
+ VIDEO:
77
+
78
+ MPEG:
79
+ NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
80
+ MIME: video/mpeg
81
+ EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
82
+
83
+ MPEG4:
84
+ NAME: Moving Picture Experts Group (MPEG-4)
85
+ MIME: video/mp4
86
+ EXTENSIONS: mp4,mpeg4
87
+
88
+ MJP2:
89
+ NAME: Motion JPEG 2000 (MJP2)
90
+ MIME: video/mj2
91
+ EXTENSIONS: mj2, mjp2
92
+
93
+ QTFF:
94
+ NAME: QuickTime File Format (QTFF)
95
+ MIME: video/quicktime
96
+ EXTENSIONS: mov, qt
97
+
98
+ AVI:
99
+ NAME: Audio Video Interleave (AVI)
100
+ MIME: video/x-msvideo
101
+ EXTENSIONS: avi
102
+
103
+ OGGV:
104
+ NAME: OGG Video (OGGV)
105
+ MIME: video/ogg
106
+ EXTENSIONS: ogv
107
+
108
+ WMV:
109
+ NAME: Windows Media Video (WMV)
110
+ MIME: video/x-ms-wmv
111
+ EXTENSIONS: wmv
112
+
113
+ DV:
114
+ NAME: Digital Video (DV)
115
+ MIME: video/dv
116
+ EXTENSIONS: dv
117
+
118
+ FLASH:
119
+ NAME: Flash video (FLV)
120
+ MIME: video/x-flv
121
+ EXTENSIONS: flv
122
+
123
+ DOCUMENT:
124
+
125
+ TXT:
126
+ NAME: Unformatted text
127
+ MIME: text/plain
128
+ EXTENSIONS: txt
129
+
130
+ RTF:
131
+ NAME: Rich Text Format (RTF)
132
+ PUID: fmt/45
133
+ MIME: text/rtf application/rtf
134
+ EXTENSIONS: rtf
135
+
136
+ HTML:
137
+ NAME: HyperText Markup Language (HTML)
138
+ MIME: text/html
139
+ EXTENSIONS: html, htm
140
+
141
+ MSDOC:
142
+ NAME: Microsoft Word Document (DOC)
143
+ PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
144
+ MIME: application/vnd.ms-word application/msword
145
+ EXTENSIONS: doc
146
+
147
+ MSDOCX:
148
+ NAME: Microsoft Word OpenXML Document (DOCX)
149
+ PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
150
+ MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
151
+ EXTENSIONS: docx
152
+
153
+ MSXLS:
154
+ NAME: Microsoft Excel Spreadsheet (XLS)
155
+ MIME: application/vnd.ms-excel,application/msexcel
156
+ EXTENSIONS: xls
157
+
158
+ MSXLSX:
159
+ NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
160
+ PUID: fido-fmt/189.xl
161
+ MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
162
+ EXTENSIONS: xslx
163
+
164
+ MSPPT:
165
+ NAME: Microsoft Powerpoint Presentation (PPT)
166
+ MIME: application/vnd.ms-powerpoint,application/mspowerpoint
167
+ EXTENSIONS: ppt
168
+
169
+ MSPPTX:
170
+ NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
171
+ PUID: fido-fmt/189.ppt
172
+ MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
173
+ EXTENSIONS: pptx
174
+
175
+ PDF:
176
+ NAME: Adobe Portable Document Format (PDF)
177
+ PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
178
+ MIME: application/pdf
179
+ EXTENSIONS: pdf
180
+
181
+ PDFA:
182
+ NAME: Adobe Portable Document Format for Archives (PDFA)
183
+ PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
184
+ MIME: application/pdf
185
+ EXTENSIONS: pdf
186
+
187
+ WORDPERFECT:
188
+ NAME: WordPerfect Document (WPD)
189
+ PUID: x-fmt/44 x-fmt/394
190
+ MIME: application/vnd.wordperfect
191
+ EXTENSIONS: wpd
192
+
193
+ XML:
194
+ NAME: Extensible Markup Language (XML)
195
+ MIME: text/xml
196
+ PUID: fmt/101
197
+ EXTENSIONS: xml
198
+
199
+ SHAREPOINT_MAP:
200
+ NAME: Sharepoint mapping file
201
+ # This is again an invented mime type. Its actually an XML ...
202
+ MIME: text/xml/sharepoint_map
203
+ PUID: fmt/101
204
+ EXTENSIONS: xml
205
+
206
+ ARCHIVE:
207
+
208
+ EAD:
209
+ NAME: Encoded Archival Description (EAD)
210
+ # This is again an invented mime type. Its actually an XML ...
211
+ MIME: archive/ead
212
+ PUID: fmt/101
213
+ EXTENSIONS: ead,xml
@@ -0,0 +1,103 @@
1
+ # coding: utf-8
2
+
3
+ ### require 'tools/string'
4
+
5
+ require 'libis/tools/logger'
6
+ require 'libis/format/type_database'
7
+
8
+ require_relative 'repository'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Base
15
+ include Libis::Tools::Logger
16
+
17
+ def input_types
18
+ raise RuntimeError, 'Method #input_types needs to be overridden in converter'
19
+ end
20
+
21
+ protected
22
+
23
+ def output_types
24
+ raise RuntimeError, 'Method #output_types needs to be overridden in converter'
25
+ end
26
+
27
+ attr_accessor :source, :options, :flags
28
+
29
+ def init(_)
30
+ raise RuntimeError, 'Method #init should be implemented in converter'
31
+ end
32
+
33
+ def do_convert(_, _)
34
+ raise RuntimeError, 'Method #do_convert should be implemented in converter'
35
+ end
36
+
37
+ public
38
+
39
+ def initialize( source = nil, options = {}, flags = {} )
40
+ @source = source
41
+ @options = options ? options : {}
42
+ @flags = flags ? flags : {}
43
+ init(source.to_s rescue nil)
44
+ end
45
+
46
+ def convert(target, format = nil)
47
+ do_convert(target, format)
48
+ end
49
+
50
+ def Base.inherited( klass )
51
+
52
+ Repository.register klass
53
+
54
+ class << self
55
+
56
+ def conversions
57
+ input_types.inject({}) do |input_type, hash|
58
+ hash[input_type] = output_types
59
+ hash
60
+ end
61
+ end
62
+
63
+ def input_type?(type_id)
64
+ input_types.include? type_id
65
+ end
66
+
67
+ def output_type?(type_id)
68
+ output_types.include? type_id
69
+ end
70
+
71
+ def input_mimetype?(mimetype)
72
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
73
+ input_type? type_id
74
+ end
75
+
76
+ def output_mimetype?(mimetype)
77
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
78
+ output_type? type_id
79
+ end
80
+
81
+ def conversion?(input_type, output_type)
82
+ conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
83
+ end
84
+
85
+ def output_for(input_type)
86
+ conversions[input_type]
87
+ end
88
+
89
+ def extension?(extension)
90
+ !TypeDatabase.ext_types(extension).first.nil?
91
+ end
92
+
93
+ end
94
+
95
+
96
+ end
97
+
98
+
99
+ end
100
+
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+
3
+ require 'fileutils'
4
+
5
+ require 'libis/tools/logger'
6
+ require 'libis/format/type_database'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class Chain
13
+ include ::Libis::Tools::Logger
14
+
15
+ def initialize(converter_chain)
16
+ @converter_chain = converter_chain
17
+ end
18
+
19
+ def to_array
20
+ @converter_chain
21
+ end
22
+
23
+ def convert(src_file, target_file, operations = [])
24
+
25
+ chain = @converter_chain.clone
26
+
27
+ my_operations = {}
28
+
29
+ # sanity check: check if the required operations are supported by at least one converter in the chain
30
+ operations.each do |k,v|
31
+ method = k.to_s.downcase.to_sym
32
+ chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
33
+ if chain_element
34
+ my_operations[chain_element[:converter]] ||= {}
35
+ my_operations[chain_element[:converter]][method] = v
36
+ else
37
+ error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
38
+ end
39
+ end
40
+
41
+ temp_files = []
42
+
43
+ # noinspection RubyParenthesesAroundConditionInspection
44
+ while (chain_element = chain.shift)
45
+
46
+ target_type = chain_element[:target]
47
+ converter_class = chain_element[:converter]
48
+ converter = converter_class.new(src_file)
49
+
50
+ my_operations[converter_class].each do |k,v|
51
+ converter.send k, v
52
+ end
53
+
54
+ target = target_file
55
+
56
+ unless chain.empty?
57
+ target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
58
+ target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
59
+ temp_files << target
60
+ end
61
+
62
+ FileUtils.mkdir_p File.dirname(target)
63
+
64
+ converter.convert(target, target_type)
65
+
66
+ src_file = target
67
+
68
+ end
69
+
70
+ temp_files.each do |f|
71
+ File.delete(f)
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,110 @@
1
+ # coding: utf-8
2
+
3
+ require 'set'
4
+ require 'singleton'
5
+
6
+ require 'libis/tools/logger'
7
+
8
+ require_relative 'chain'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Repository
15
+ include Singleton
16
+ include ::Libis::Tools::Logger
17
+
18
+ attr_reader :converters
19
+ attr_writer :converters_glob
20
+
21
+ def initialize
22
+ @converters = Set.new
23
+ @converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
24
+ end
25
+
26
+ def Repository.register(converter_class)
27
+ instance.converters.add? converter_class
28
+ end
29
+
30
+ def Repository.get_converters
31
+ if instance.converters.empty?
32
+ Dir.glob(instance.converters_glob).each do |filename|
33
+ # noinspection RubyResolve
34
+ require File.expand_path(filename)
35
+ end
36
+ end
37
+ instance.converters
38
+ end
39
+
40
+ def Repository.get_converter_chain(src_type, tgt_type, operations = [])
41
+ msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
42
+ chain_list = recursive_chain src_type, tgt_type, operations
43
+ if chain_list.length > 1
44
+ warn "Found more than one conversion chain for #{msg}. Picking the first one."
45
+ end
46
+ if chain_list.empty?
47
+ error "No conversion chain found for #{msg}"
48
+ return nil
49
+ end
50
+ chain_list.each do |chain|
51
+ msg = "Base chain: #{src_type.to_s}"
52
+ chain.each do |node|
53
+ msg += "->#{node[:converter].name}:#{node[:target].to_s}"
54
+ end
55
+ debug msg
56
+ end
57
+ ::Libis::Format::Converters::Chain.new(chain_list[0])
58
+ end
59
+
60
+ private
61
+
62
+ def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
63
+ return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
64
+
65
+ get_converters.each do |converter|
66
+ if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
67
+ c[:converter] == converter and c[:target] == tgt_type }
68
+ node = Hash.new
69
+ node[:converter] = converter
70
+ node[:target] = tgt_type
71
+ sequence = current_chain.dup
72
+ sequence << node
73
+ # check if the chain supports all the operations
74
+ success = true
75
+ operations.each do |op, _|
76
+ success = false unless sequence.any? do |n|
77
+ n[:converter].new.respond_to? op.to_s.downcase.to_sym
78
+ end
79
+ end
80
+ if success
81
+ # we only want to remember the shortest converter chains
82
+ if !chains_found.empty? and sequence.length < chains_found[0].length
83
+ chains_found.clear
84
+ end
85
+ chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
86
+ end
87
+ end
88
+ end
89
+
90
+ return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
91
+
92
+ get_converters.each do |converter|
93
+ next unless converter.input_type? src_type
94
+ converter.output_types(src_type).each do |tmp_type|
95
+ # would like to enable the following for optimalization, but some operation may require such a step
96
+ # next if tmp_type == src_type
97
+ # next if current_chain.any? { |c| c[:target] == tmp_type}
98
+ recursive_chain(tmp_type, tgt_type, operations, chains_found,
99
+ current_chain.dup << {:converter => converter, :target => tmp_type})
100
+ end
101
+ end
102
+
103
+ chains_found
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,11 @@
1
+ # code utf-8
2
+
3
+ module Libis
4
+ module Format
5
+ module Converter
6
+ autoload :Base, 'libis/format/converter/base'
7
+ autoload :Chain, 'libis/format/converter/chain'
8
+ autoload :Repository, 'libis/format/converter/repository'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,38 @@
1
+ require 'os'
2
+ require 'tempfile'
3
+ require 'csv'
4
+ require 'singleton'
5
+
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/command'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class Droid
14
+ include ::Libis::Tools::Logger
15
+ include Singleton
16
+
17
+ def self.run(file)
18
+ instance.run file
19
+ end
20
+
21
+ def run(file)
22
+ droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
23
+ droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
24
+ profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
+ report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
+ result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
27
+ warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
28
+ result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
29
+ warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
30
+ File.delete profile
31
+ result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
32
+ File.delete report
33
+ result.map{|r|r.to_hash}
34
+ end
35
+ end
36
+
37
+ end
38
+ end