libis-format 0.9.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (186) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +15 -0
  4. data/.travis.yml +36 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +38 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/data/ISOcoated.icc +0 -0
  12. data/data/PDFA_def.ps +32 -0
  13. data/data/ead.xsd +2728 -0
  14. data/data/lias_formats.xml +106 -0
  15. data/data/types.yml +213 -0
  16. data/lib/libis/format/converter/base.rb +103 -0
  17. data/lib/libis/format/converter/chain.rb +80 -0
  18. data/lib/libis/format/converter/repository.rb +110 -0
  19. data/lib/libis/format/converter.rb +11 -0
  20. data/lib/libis/format/droid.rb +38 -0
  21. data/lib/libis/format/fido.rb +109 -0
  22. data/lib/libis/format/identifier.rb +185 -0
  23. data/lib/libis/format/type_database.rb +170 -0
  24. data/lib/libis/format/version.rb +5 -0
  25. data/lib/libis/format.rb +12 -0
  26. data/lib/libis-format.rb +1 -0
  27. data/libis-format.gemspec +30 -0
  28. data/spec/data/Cevennes2.bmp +0 -0
  29. data/spec/data/Cevennes2.jp2 +0 -0
  30. data/spec/data/Cevennes2.ppm +22492 -0
  31. data/spec/data/test-ead.xml +392 -0
  32. data/spec/data/test-jpg.tif +0 -0
  33. data/spec/data/test-lzw.tif +0 -0
  34. data/spec/data/test.bmp +0 -0
  35. data/spec/data/test.doc +0 -0
  36. data/spec/data/test.docx +0 -0
  37. data/spec/data/test.gif +0 -0
  38. data/spec/data/test.ods +0 -0
  39. data/spec/data/test.odt +0 -0
  40. data/spec/data/test.pdf +0 -0
  41. data/spec/data/test.png +0 -0
  42. data/spec/data/test.ps +8631 -0
  43. data/spec/data/test.psd +0 -0
  44. data/spec/data/test.rtf +1455 -0
  45. data/spec/data/test.tif +0 -0
  46. data/spec/data/test.txt +12 -0
  47. data/spec/data/test.xcf +0 -0
  48. data/spec/data/test.xls +0 -0
  49. data/spec/data/test.xlsx +0 -0
  50. data/spec/data/test.xml +4 -0
  51. data/spec/identifier_spec.rb +59 -0
  52. data/spec/spec_helper.rb +9 -0
  53. data/spec/test_types.yml +12 -0
  54. data/spec/type_database_spec.rb +140 -0
  55. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  56. data/tools/droid/container-signature-20150307.xml +2235 -0
  57. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  58. data/tools/droid/droid.bat +154 -0
  59. data/tools/droid/droid.sh +138 -0
  60. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  61. data/tools/droid/lib/activation-1.1.jar +0 -0
  62. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  63. data/tools/droid/lib/antlr-3.2.jar +0 -0
  64. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  65. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  66. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  67. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  68. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  69. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  70. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  71. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  72. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  73. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  74. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  75. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  76. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  77. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  78. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  79. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  80. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  81. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  82. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  83. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  84. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  85. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  86. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  87. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  88. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  89. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  90. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  91. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  92. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  93. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  94. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  95. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  96. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  97. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  98. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  99. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  100. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  101. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  102. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  103. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  104. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  105. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  106. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  107. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  108. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  109. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  110. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  111. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  112. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  113. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  114. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  115. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  116. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  117. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  118. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  119. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  120. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  121. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  122. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  123. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  124. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  125. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  126. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  127. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  128. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  129. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  130. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  131. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  132. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  133. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  134. data/tools/droid/lib/jta-1.1.jar +0 -0
  135. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  136. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  137. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  138. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  139. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  140. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  141. data/tools/droid/lib/poi-3.7.jar +0 -0
  142. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  143. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  144. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  145. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  146. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  147. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  148. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  149. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  150. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  151. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  152. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  153. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  154. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  155. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  156. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  157. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  158. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  159. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  160. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  161. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  162. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  163. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  164. data/tools/droid/lib/xz-1.0.jar +0 -0
  165. data/tools/fido/__init__.py +0 -0
  166. data/tools/fido/argparselocal.py +2355 -0
  167. data/tools/fido/argparselocal.pyc +0 -0
  168. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  169. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  170. data/tools/fido/conf/dc.xsd +119 -0
  171. data/tools/fido/conf/dcmitype.xsd +53 -0
  172. data/tools/fido/conf/dcterms.xsd +383 -0
  173. data/tools/fido/conf/fido-formats.xsd +173 -0
  174. data/tools/fido/conf/format_extension_template.xml +105 -0
  175. data/tools/fido/conf/format_extensions.xml +498 -0
  176. data/tools/fido/conf/formats-v81.xml +38355 -0
  177. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  178. data/tools/fido/conf/versions.xml +8 -0
  179. data/tools/fido/fido.bat +4 -0
  180. data/tools/fido/fido.py +854 -0
  181. data/tools/fido/fido.sh +5 -0
  182. data/tools/fido/prepare.py +616 -0
  183. data/tools/fido/pronomutils.py +115 -0
  184. data/tools/fido/toxml.py +52 -0
  185. data/tools/fido/update_signatures.py +171 -0
  186. metadata +342 -0
@@ -0,0 +1,106 @@
1
+ <?xml version="1.0"?>
2
+ <formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
3
+ <format>
4
+ <puid>lias-fmt/101</puid>
5
+ <mime>text/xml</mime>
6
+ <name>Extensible Markup Language</name>
7
+ <version>1.0</version>
8
+ <alias>XML (1.0)</alias>
9
+ <pronom_id>638</pronom_id>
10
+ <extension>xml</extension>
11
+ <has_priority_over>fmt/101</has_priority_over>
12
+ <signature>
13
+ <name>XML 1.0</name>
14
+ <pattern>
15
+ <position>BOF</position>
16
+ <regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
17
+ </pattern>
18
+ </signature>
19
+ </format>
20
+ <format>
21
+ <puid>lias-fmt/001</puid>
22
+ <name>Textura TS print file</name>
23
+ <mime>text/plain</mime>
24
+ <signature>
25
+ <name>Textura TS print file</name>
26
+ <pattern>
27
+ <position>BOF</position>
28
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
29
+ </pattern>
30
+ <pattern>
31
+ <position>VAR</position>
32
+ <regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
33
+ </pattern>
34
+ <pattern>
35
+ <position>VAR</position>
36
+ <regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
37
+ </pattern>
38
+ <pattern>
39
+ <position>VAR</position>
40
+ <regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
41
+ </pattern>
42
+ <pattern>
43
+ <position>VAR</position>
44
+ <regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
45
+ </pattern>
46
+ <pattern>
47
+ <position>VAR</position>
48
+ <regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
49
+ </pattern>
50
+ <pattern>
51
+ <position>VAR</position>
52
+ <regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
53
+ </pattern>
54
+ <!--
55
+ -->
56
+ <pattern>
57
+ <position>VAR</position>
58
+ <regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
59
+ </pattern>
60
+ <pattern>
61
+ <position>EOF</position>
62
+ <regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
63
+ </pattern>
64
+ </signature>
65
+ </format>
66
+ <format>
67
+ <puid>lias-fmt/002</puid>
68
+ <name>Textura VP/S print file</name>
69
+ <mime>text/plain</mime>
70
+ <has_priority_over>lias-fmt/001</has_priority_over>
71
+ <signature>
72
+ <name>Textura VP/S print file</name>
73
+ <pattern>
74
+ <position>BOF</position>
75
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
76
+ </pattern>
77
+ <pattern>
78
+ <position>EOF</position>
79
+ <regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
80
+ </pattern>
81
+ </signature>
82
+ </format>
83
+ <format>
84
+ <puid>lias-fmt/189.word</puid>
85
+ <name>Microsoft Office Open XML - Word</name>
86
+ <extension>docx</extension>
87
+ <has_priority_over>x-fmt/263</has_priority_over>
88
+ <has_priority_over>fmt/189</has_priority_over>
89
+ <has_priority_over>fido-fmt/189.word</has_priority_over>
90
+ <signature>
91
+ <name>Microsoft Office Open XML - Word</name>
92
+ <pattern>
93
+ <position>BOF</position>
94
+ <regex>(?s)\APK\x03\x04</regex>
95
+ </pattern>
96
+ <pattern>
97
+ <position>BOF</position>
98
+ <regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
99
+ </pattern>
100
+ <pattern>
101
+ <position>EOF</position>
102
+ <regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
103
+ </pattern>
104
+ </signature>
105
+ </format>
106
+ </formats>
data/data/types.yml ADDED
@@ -0,0 +1,213 @@
1
+ ---
2
+ # This lists all the types the converters know about along with the mime types and file extensions.
3
+ # The first file extension in the list is the default one that will be used when a file of that type is created.
4
+ # The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
5
+ # class should take care of that.
6
+ # Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
7
+ # reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
8
+ # to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
9
+
10
+ IMAGE:
11
+ TIFF:
12
+ NAME: Tagged Image File Format (TIFF)
13
+ MIME: image/tiff
14
+ EXTENSIONS: tif,tiff
15
+
16
+ JPEG2000:
17
+ NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
18
+ MIME: image/jp2
19
+ EXTENSIONS: jp2
20
+
21
+ JPEG:
22
+ NAME: Joint Photographic Experts Group (JPEG)
23
+ MIME: image/jpeg
24
+ EXTENSIONS: jpg,jpe,jpeg
25
+
26
+ PNG:
27
+ NAME: Portable Network Graphics (PNG)
28
+ MIME: image/png
29
+ EXTENSIONS: png
30
+
31
+ BMP:
32
+ NAME: Device Independent Bitmap (DIP/BMP)
33
+ MIME: image/bmp,image/x-ms-bmp
34
+ EXTENSIONS: bmp
35
+
36
+ GIF:
37
+ NAME: Graphics Interchange Format (GIF)
38
+ MIME: image/gif
39
+ EXTENSIONS: gif
40
+
41
+ PBM:
42
+ NAME: Portable Bitmap Format (PBM)
43
+ PUID: fmt/409
44
+ MIME: image/x‑portable‑bitmap
45
+ EXTENSIONS: pbm
46
+
47
+ PGM:
48
+ NAME: Portable GrayMap Format (PGM)
49
+ PUID: fmt/406
50
+ MIME: image/x‑portable‑graymap
51
+ EXTENSIONS: pgm
52
+
53
+ PPM:
54
+ NAME: Portable Pixel Map (PPM)
55
+ PUID: fmt/408
56
+ MIME: image/x‑portable‑pixmap
57
+ EXTENSIONS: ppm
58
+
59
+ AUDIO:
60
+
61
+ WAV:
62
+ NAME: Waveform Audio File Format (WAVE)
63
+ MIME: audio/x-wav
64
+ EXTENSIONS: wav
65
+
66
+ MP3:
67
+ NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
68
+ MIME: audio/mpeg
69
+ EXTENSIONS: mp3
70
+
71
+ FLAC:
72
+ NAME: Free Lossless Audio Codec (FLAC)
73
+ MIME: audio/flac
74
+ EXTENSIONS: flac
75
+
76
+ VIDEO:
77
+
78
+ MPEG:
79
+ NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
80
+ MIME: video/mpeg
81
+ EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
82
+
83
+ MPEG4:
84
+ NAME: Moving Picture Experts Group (MPEG-4)
85
+ MIME: video/mp4
86
+ EXTENSIONS: mp4,mpeg4
87
+
88
+ MJP2:
89
+ NAME: Motion JPEG 2000 (MJP2)
90
+ MIME: video/mj2
91
+ EXTENSIONS: mj2, mjp2
92
+
93
+ QTFF:
94
+ NAME: QuickTime File Format (QTFF)
95
+ MIME: video/quicktime
96
+ EXTENSIONS: mov, qt
97
+
98
+ AVI:
99
+ NAME: Audio Video Interleave (AVI)
100
+ MIME: video/x-msvideo
101
+ EXTENSIONS: avi
102
+
103
+ OGGV:
104
+ NAME: OGG Video (OGGV)
105
+ MIME: video/ogg
106
+ EXTENSIONS: ogv
107
+
108
+ WMV:
109
+ NAME: Windows Media Video (WMV)
110
+ MIME: video/x-ms-wmv
111
+ EXTENSIONS: wmv
112
+
113
+ DV:
114
+ NAME: Digital Video (DV)
115
+ MIME: video/dv
116
+ EXTENSIONS: dv
117
+
118
+ FLASH:
119
+ NAME: Flash video (FLV)
120
+ MIME: video/x-flv
121
+ EXTENSIONS: flv
122
+
123
+ DOCUMENT:
124
+
125
+ TXT:
126
+ NAME: Unformatted text
127
+ MIME: text/plain
128
+ EXTENSIONS: txt
129
+
130
+ RTF:
131
+ NAME: Rich Text Format (RTF)
132
+ PUID: fmt/45
133
+ MIME: text/rtf application/rtf
134
+ EXTENSIONS: rtf
135
+
136
+ HTML:
137
+ NAME: HyperText Markup Language (HTML)
138
+ MIME: text/html
139
+ EXTENSIONS: html, htm
140
+
141
+ MSDOC:
142
+ NAME: Microsoft Word Document (DOC)
143
+ PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
144
+ MIME: application/vnd.ms-word application/msword
145
+ EXTENSIONS: doc
146
+
147
+ MSDOCX:
148
+ NAME: Microsoft Word OpenXML Document (DOCX)
149
+ PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
150
+ MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
151
+ EXTENSIONS: docx
152
+
153
+ MSXLS:
154
+ NAME: Microsoft Excel Spreadsheet (XLS)
155
+ MIME: application/vnd.ms-excel,application/msexcel
156
+ EXTENSIONS: xls
157
+
158
+ MSXLSX:
159
+ NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
160
+ PUID: fido-fmt/189.xl
161
+ MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
162
+ EXTENSIONS: xslx
163
+
164
+ MSPPT:
165
+ NAME: Microsoft Powerpoint Presentation (PPT)
166
+ MIME: application/vnd.ms-powerpoint,application/mspowerpoint
167
+ EXTENSIONS: ppt
168
+
169
+ MSPPTX:
170
+ NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
171
+ PUID: fido-fmt/189.ppt
172
+ MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
173
+ EXTENSIONS: pptx
174
+
175
+ PDF:
176
+ NAME: Adobe Portable Document Format (PDF)
177
+ PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
178
+ MIME: application/pdf
179
+ EXTENSIONS: pdf
180
+
181
+ PDFA:
182
+ NAME: Adobe Portable Document Format for Archives (PDFA)
183
+ PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
184
+ MIME: application/pdf
185
+ EXTENSIONS: pdf
186
+
187
+ WORDPERFECT:
188
+ NAME: WordPerfect Document (WPD)
189
+ PUID: x-fmt/44 x-fmt/394
190
+ MIME: application/vnd.wordperfect
191
+ EXTENSIONS: wpd
192
+
193
+ XML:
194
+ NAME: Extensible Markup Language (XML)
195
+ MIME: text/xml
196
+ PUID: fmt/101
197
+ EXTENSIONS: xml
198
+
199
+ SHAREPOINT_MAP:
200
+ NAME: Sharepoint mapping file
201
+ # This is again an invented mime type. Its actually an XML ...
202
+ MIME: text/xml/sharepoint_map
203
+ PUID: fmt/101
204
+ EXTENSIONS: xml
205
+
206
+ ARCHIVE:
207
+
208
+ EAD:
209
+ NAME: Encoded Archival Description (EAD)
210
+ # This is again an invented mime type. Its actually an XML ...
211
+ MIME: archive/ead
212
+ PUID: fmt/101
213
+ EXTENSIONS: ead,xml
@@ -0,0 +1,103 @@
1
+ # coding: utf-8
2
+
3
+ ### require 'tools/string'
4
+
5
+ require 'libis/tools/logger'
6
+ require 'libis/format/type_database'
7
+
8
+ require_relative 'repository'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Base
15
+ include Libis::Tools::Logger
16
+
17
+ def input_types
18
+ raise RuntimeError, 'Method #input_types needs to be overridden in converter'
19
+ end
20
+
21
+ protected
22
+
23
+ def output_types
24
+ raise RuntimeError, 'Method #output_types needs to be overridden in converter'
25
+ end
26
+
27
+ attr_accessor :source, :options, :flags
28
+
29
+ def init(_)
30
+ raise RuntimeError, 'Method #init should be implemented in converter'
31
+ end
32
+
33
+ def do_convert(_, _)
34
+ raise RuntimeError, 'Method #do_convert should be implemented in converter'
35
+ end
36
+
37
+ public
38
+
39
+ def initialize( source = nil, options = {}, flags = {} )
40
+ @source = source
41
+ @options = options ? options : {}
42
+ @flags = flags ? flags : {}
43
+ init(source.to_s rescue nil)
44
+ end
45
+
46
+ def convert(target, format = nil)
47
+ do_convert(target, format)
48
+ end
49
+
50
+ def Base.inherited( klass )
51
+
52
+ Repository.register klass
53
+
54
+ class << self
55
+
56
+ def conversions
57
+ input_types.inject({}) do |input_type, hash|
58
+ hash[input_type] = output_types
59
+ hash
60
+ end
61
+ end
62
+
63
+ def input_type?(type_id)
64
+ input_types.include? type_id
65
+ end
66
+
67
+ def output_type?(type_id)
68
+ output_types.include? type_id
69
+ end
70
+
71
+ def input_mimetype?(mimetype)
72
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
73
+ input_type? type_id
74
+ end
75
+
76
+ def output_mimetype?(mimetype)
77
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
78
+ output_type? type_id
79
+ end
80
+
81
+ def conversion?(input_type, output_type)
82
+ conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
83
+ end
84
+
85
+ def output_for(input_type)
86
+ conversions[input_type]
87
+ end
88
+
89
+ def extension?(extension)
90
+ !TypeDatabase.ext_types(extension).first.nil?
91
+ end
92
+
93
+ end
94
+
95
+
96
+ end
97
+
98
+
99
+ end
100
+
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,80 @@
1
+ # coding: utf-8
2
+
3
+ require 'fileutils'
4
+
5
+ require 'libis/tools/logger'
6
+ require 'libis/format/type_database'
7
+
8
+ module Libis
9
+ module Format
10
+ module Converter
11
+
12
+ class Chain
13
+ include ::Libis::Tools::Logger
14
+
15
+ def initialize(converter_chain)
16
+ @converter_chain = converter_chain
17
+ end
18
+
19
+ def to_array
20
+ @converter_chain
21
+ end
22
+
23
+ def convert(src_file, target_file, operations = [])
24
+
25
+ chain = @converter_chain.clone
26
+
27
+ my_operations = {}
28
+
29
+ # sanity check: check if the required operations are supported by at least one converter in the chain
30
+ operations.each do |k,v|
31
+ method = k.to_s.downcase.to_sym
32
+ chain_element = @converter_chain.reverse.detect { |c| c[:converter].new.respond_to? method }
33
+ if chain_element
34
+ my_operations[chain_element[:converter]] ||= {}
35
+ my_operations[chain_element[:converter]][method] = v
36
+ else
37
+ error "No converter in the converter chain supports '#{method.to_s}'. Continuing conversion without this operation."
38
+ end
39
+ end
40
+
41
+ temp_files = []
42
+
43
+ # noinspection RubyParenthesesAroundConditionInspection
44
+ while (chain_element = chain.shift)
45
+
46
+ target_type = chain_element[:target]
47
+ converter_class = chain_element[:converter]
48
+ converter = converter_class.new(src_file)
49
+
50
+ my_operations[converter_class].each do |k,v|
51
+ converter.send k, v
52
+ end
53
+
54
+ target = target_file
55
+
56
+ unless chain.empty?
57
+ target += '.temp.' + TypeDatabase.instance.type2ext(target_type)
58
+ target += '.' + TypeDatabase.instance.type2ext(target_type) while File.exist? target
59
+ temp_files << target
60
+ end
61
+
62
+ FileUtils.mkdir_p File.dirname(target)
63
+
64
+ converter.convert(target, target_type)
65
+
66
+ src_file = target
67
+
68
+ end
69
+
70
+ temp_files.each do |f|
71
+ File.delete(f)
72
+ end
73
+
74
+ end
75
+
76
+ end
77
+
78
+ end
79
+ end
80
+ end
@@ -0,0 +1,110 @@
1
+ # coding: utf-8
2
+
3
+ require 'set'
4
+ require 'singleton'
5
+
6
+ require 'libis/tools/logger'
7
+
8
+ require_relative 'chain'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Repository
15
+ include Singleton
16
+ include ::Libis::Tools::Logger
17
+
18
+ attr_reader :converters
19
+ attr_writer :converters_glob
20
+
21
+ def initialize
22
+ @converters = Set.new
23
+ @converters_glob = File.join(File.basename(__FILE__), '*_converter.rb')
24
+ end
25
+
26
+ def Repository.register(converter_class)
27
+ instance.converters.add? converter_class
28
+ end
29
+
30
+ def Repository.get_converters
31
+ if instance.converters.empty?
32
+ Dir.glob(instance.converters_glob).each do |filename|
33
+ # noinspection RubyResolve
34
+ require File.expand_path(filename)
35
+ end
36
+ end
37
+ instance.converters
38
+ end
39
+
40
+ def Repository.get_converter_chain(src_type, tgt_type, operations = [])
41
+ msg = "conversion from #{src_type.to_s} to #{tgt_type.to_s}"
42
+ chain_list = recursive_chain src_type, tgt_type, operations
43
+ if chain_list.length > 1
44
+ warn "Found more than one conversion chain for #{msg}. Picking the first one."
45
+ end
46
+ if chain_list.empty?
47
+ error "No conversion chain found for #{msg}"
48
+ return nil
49
+ end
50
+ chain_list.each do |chain|
51
+ msg = "Base chain: #{src_type.to_s}"
52
+ chain.each do |node|
53
+ msg += "->#{node[:converter].name}:#{node[:target].to_s}"
54
+ end
55
+ debug msg
56
+ end
57
+ ::Libis::Format::Converters::Chain.new(chain_list[0])
58
+ end
59
+
60
+ private
61
+
62
+ def Repository.recursive_chain(src_type, tgt_type, operations, chains_found = [], current_chain = [])
63
+ return chains_found unless current_chain.length < 8 # upper limit of converter chain we want to consider
64
+
65
+ get_converters.each do |converter|
66
+ if converter.conversion? src_type, tgt_type and !current_chain.any? { |c|
67
+ c[:converter] == converter and c[:target] == tgt_type }
68
+ node = Hash.new
69
+ node[:converter] = converter
70
+ node[:target] = tgt_type
71
+ sequence = current_chain.dup
72
+ sequence << node
73
+ # check if the chain supports all the operations
74
+ success = true
75
+ operations.each do |op, _|
76
+ success = false unless sequence.any? do |n|
77
+ n[:converter].new.respond_to? op.to_s.downcase.to_sym
78
+ end
79
+ end
80
+ if success
81
+ # we only want to remember the shortest converter chains
82
+ if !chains_found.empty? and sequence.length < chains_found[0].length
83
+ chains_found.clear
84
+ end
85
+ chains_found << sequence if chains_found.empty? or sequence.length == chains_found[0].length
86
+ end
87
+ end
88
+ end
89
+
90
+ return chains_found unless chains_found.empty? or current_chain.length + 1 < chains_found[0].length
91
+
92
+ get_converters.each do |converter|
93
+ next unless converter.input_type? src_type
94
+ converter.output_types(src_type).each do |tmp_type|
95
+ # would like to enable the following for optimalization, but some operation may require such a step
96
+ # next if tmp_type == src_type
97
+ # next if current_chain.any? { |c| c[:target] == tmp_type}
98
+ recursive_chain(tmp_type, tgt_type, operations, chains_found,
99
+ current_chain.dup << {:converter => converter, :target => tmp_type})
100
+ end
101
+ end
102
+
103
+ chains_found
104
+ end
105
+
106
+ end
107
+
108
+ end
109
+ end
110
+ end
@@ -0,0 +1,11 @@
1
+ # code utf-8
2
+
3
+ module Libis
4
+ module Format
5
+ module Converter
6
+ autoload :Base, 'libis/format/converter/base'
7
+ autoload :Chain, 'libis/format/converter/chain'
8
+ autoload :Repository, 'libis/format/converter/repository'
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,38 @@
1
+ require 'os'
2
+ require 'tempfile'
3
+ require 'csv'
4
+ require 'singleton'
5
+
6
+ require 'libis/tools/extend/string'
7
+ require 'libis/tools/logger'
8
+ require 'libis/tools/command'
9
+
10
+ module Libis
11
+ module Format
12
+
13
+ class Droid
14
+ include ::Libis::Tools::Logger
15
+ include Singleton
16
+
17
+ def self.run(file)
18
+ instance.run file
19
+ end
20
+
21
+ def run(file)
22
+ droid_dir = File.join(File.dirname(__FILE__), '..','..','..','tools','droid')
23
+ droid_cmd = File.join(droid_dir, OS.windows? ? 'droid.bat' : 'droid.sh')
24
+ profile = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .profile', nil)
25
+ report = File.join Dir.tmpdir, Dir::Tmpname.make_tmpname(%w'droid .csv', nil)
26
+ result = Libis::Tools::Command.run droid_cmd, '-a', file.escape_for_string, '-p', profile, '-q'
27
+ warn "DROID profile errors: #{result[:err].join("\n")}" unless result[:status] == 0
28
+ result = Libis::Tools::Command.run droid_cmd, '-e', report, '-p', profile, '-q'
29
+ warn "DROID report errors: #{result[:err].join("\n")}" unless result[:status] == 0
30
+ File.delete profile
31
+ result = CSV.read(report , headers: true, header_converters: [:downcase, :symbol])
32
+ File.delete report
33
+ result.map{|r|r.to_hash}
34
+ end
35
+ end
36
+
37
+ end
38
+ end