libis-format 0.9.5-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (207) hide show
  1. checksums.yaml +7 -0
  2. data/.coveralls.yml +2 -0
  3. data/.gitignore +18 -0
  4. data/.travis.yml +41 -0
  5. data/Gemfile +5 -0
  6. data/LICENSE.txt +22 -0
  7. data/README.md +39 -0
  8. data/Rakefile +8 -0
  9. data/bin/droid +15 -0
  10. data/bin/fido +12 -0
  11. data/bin/pdf_copy +13 -0
  12. data/data/ISOcoated_v2_eci.icc +0 -0
  13. data/data/PDFA_def.ps +40 -0
  14. data/data/ead.xsd +2728 -0
  15. data/data/eciRGB_v2.icc +0 -0
  16. data/data/lias_formats.xml +106 -0
  17. data/data/types.yml +217 -0
  18. data/lib/libis/format/config.rb +35 -0
  19. data/lib/libis/format/converter/base.rb +101 -0
  20. data/lib/libis/format/converter/chain.rb +167 -0
  21. data/lib/libis/format/converter/image_converter.rb +214 -0
  22. data/lib/libis/format/converter/office_converter.rb +50 -0
  23. data/lib/libis/format/converter/pdf_converter.rb +139 -0
  24. data/lib/libis/format/converter/repository.rb +98 -0
  25. data/lib/libis/format/converter.rb +11 -0
  26. data/lib/libis/format/droid.rb +45 -0
  27. data/lib/libis/format/fido.rb +102 -0
  28. data/lib/libis/format/identifier.rb +189 -0
  29. data/lib/libis/format/office_to_pdf.rb +52 -0
  30. data/lib/libis/format/pdf_copy.rb +40 -0
  31. data/lib/libis/format/pdf_merge.rb +41 -0
  32. data/lib/libis/format/pdf_split.rb +39 -0
  33. data/lib/libis/format/pdf_to_pdfa.rb +76 -0
  34. data/lib/libis/format/pdfa_validator.rb +61 -0
  35. data/lib/libis/format/type_database.rb +170 -0
  36. data/lib/libis/format/version.rb +5 -0
  37. data/lib/libis/format.rb +23 -0
  38. data/lib/libis-format.rb +1 -0
  39. data/libis-format.gemspec +34 -0
  40. data/spec/converter_spec.rb +212 -0
  41. data/spec/data/Cevennes2.bmp +0 -0
  42. data/spec/data/Cevennes2.jp2 +0 -0
  43. data/spec/data/Cevennes2.ppm +22492 -0
  44. data/spec/data/test-ead.xml +392 -0
  45. data/spec/data/test-jpg.tif +0 -0
  46. data/spec/data/test-lzw.tif +0 -0
  47. data/spec/data/test-options.jpg +0 -0
  48. data/spec/data/test.bmp +0 -0
  49. data/spec/data/test.doc +0 -0
  50. data/spec/data/test.docx +0 -0
  51. data/spec/data/test.gif +0 -0
  52. data/spec/data/test.jpg +0 -0
  53. data/spec/data/test.ods +0 -0
  54. data/spec/data/test.odt +0 -0
  55. data/spec/data/test.pdf +0 -0
  56. data/spec/data/test.pdf.tif +0 -0
  57. data/spec/data/test.png +0 -0
  58. data/spec/data/test.ps +8631 -0
  59. data/spec/data/test.psd +0 -0
  60. data/spec/data/test.rtf +1455 -0
  61. data/spec/data/test.tif +0 -0
  62. data/spec/data/test.txt +12 -0
  63. data/spec/data/test.xcf +0 -0
  64. data/spec/data/test.xls +0 -0
  65. data/spec/data/test.xlsx +0 -0
  66. data/spec/data/test.xml +4 -0
  67. data/spec/data/test_pdfa.pdf +0 -0
  68. data/spec/identifier_spec.rb +60 -0
  69. data/spec/spec_helper.rb +9 -0
  70. data/spec/test_types.yml +12 -0
  71. data/spec/type_database_spec.rb +140 -0
  72. data/tools/PdfTool.jar +0 -0
  73. data/tools/bcpkix-jdk15on-1.49.jar +0 -0
  74. data/tools/bcprov-jdk15on-1.49.jar +0 -0
  75. data/tools/droid/DROID_SignatureFile_V82.xml +32681 -0
  76. data/tools/droid/container-signature-20150307.xml +2235 -0
  77. data/tools/droid/droid-command-line-6.1.5.jar +0 -0
  78. data/tools/droid/droid.bat +154 -0
  79. data/tools/droid/droid.sh +138 -0
  80. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  81. data/tools/droid/lib/activation-1.1.jar +0 -0
  82. data/tools/droid/lib/antlr-2.7.7.jar +0 -0
  83. data/tools/droid/lib/antlr-3.2.jar +0 -0
  84. data/tools/droid/lib/antlr-runtime-3.2.jar +0 -0
  85. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  86. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  87. data/tools/droid/lib/aspectjrt-1.7.2.jar +0 -0
  88. data/tools/droid/lib/aspectjweaver-1.7.2.jar +0 -0
  89. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  90. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  91. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  92. data/tools/droid/lib/byteseek-1.1.1.jar +0 -0
  93. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  94. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  95. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  96. data/tools/droid/lib/commons-codec-1.4.jar +0 -0
  97. data/tools/droid/lib/commons-collections-3.2.1.jar +0 -0
  98. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  99. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  100. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  101. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  102. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  103. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  104. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  105. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  106. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  107. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  108. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  109. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  110. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  111. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  112. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  113. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  114. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  115. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  116. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  117. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  118. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  119. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  120. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  121. data/tools/droid/lib/dom4j-1.6.1.jar +0 -0
  122. data/tools/droid/lib/droid-container-6.1.5.jar +0 -0
  123. data/tools/droid/lib/droid-core-6.1.5.jar +0 -0
  124. data/tools/droid/lib/droid-core-interfaces-6.1.5.jar +0 -0
  125. data/tools/droid/lib/droid-export-6.1.5.jar +0 -0
  126. data/tools/droid/lib/droid-export-interfaces-6.1.5.jar +0 -0
  127. data/tools/droid/lib/droid-help-6.1.5.jar +0 -0
  128. data/tools/droid/lib/droid-report-6.1.5.jar +0 -0
  129. data/tools/droid/lib/droid-report-interfaces-6.1.5.jar +0 -0
  130. data/tools/droid/lib/droid-results-6.1.5.jar +0 -0
  131. data/tools/droid/lib/ejb3-persistence-1.0.2.GA.jar +0 -0
  132. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  133. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  134. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  135. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  136. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  137. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  138. data/tools/droid/lib/hibernate-commons-annotations-4.0.4.Final.jar +0 -0
  139. data/tools/droid/lib/hibernate-core-4.3.5.Final.jar +0 -0
  140. data/tools/droid/lib/hibernate-entitymanager-4.3.5.Final.jar +0 -0
  141. data/tools/droid/lib/hibernate-jpa-2.1-api-1.0.0.Final.jar +0 -0
  142. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  143. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  144. data/tools/droid/lib/jandex-1.1.0.Final.jar +0 -0
  145. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  146. data/tools/droid/lib/javassist-3.18.1-GA.jar +0 -0
  147. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  148. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  149. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  150. data/tools/droid/lib/jboss-logging-annotations-1.2.0.Beta1.jar +0 -0
  151. data/tools/droid/lib/jboss-transaction-api_1.2_spec-1.0.0.Final.jar +0 -0
  152. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  153. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  154. data/tools/droid/lib/jta-1.1.jar +0 -0
  155. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  156. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  157. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  158. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  159. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  160. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  161. data/tools/droid/lib/poi-3.7.jar +0 -0
  162. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  163. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  164. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  165. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  166. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  167. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  168. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  169. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  170. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  171. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  172. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  173. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  174. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  175. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  176. data/tools/droid/lib/stringtemplate-3.2.jar +0 -0
  177. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  178. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  179. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  180. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  181. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  182. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  183. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  184. data/tools/droid/lib/xz-1.0.jar +0 -0
  185. data/tools/fido/__init__.py +0 -0
  186. data/tools/fido/argparselocal.py +2355 -0
  187. data/tools/fido/conf/DROID_SignatureFile-v81.xml +2 -0
  188. data/tools/fido/conf/container-signature-20150307.xml +2238 -0
  189. data/tools/fido/conf/dc.xsd +119 -0
  190. data/tools/fido/conf/dcmitype.xsd +53 -0
  191. data/tools/fido/conf/dcterms.xsd +383 -0
  192. data/tools/fido/conf/fido-formats.xsd +173 -0
  193. data/tools/fido/conf/format_extension_template.xml +105 -0
  194. data/tools/fido/conf/format_extensions.xml +498 -0
  195. data/tools/fido/conf/formats-v81.xml +38355 -0
  196. data/tools/fido/conf/pronom-xml-v81.zip +0 -0
  197. data/tools/fido/conf/versions.xml +8 -0
  198. data/tools/fido/fido.bat +4 -0
  199. data/tools/fido/fido.py +854 -0
  200. data/tools/fido/fido.sh +5 -0
  201. data/tools/fido/prepare.py +616 -0
  202. data/tools/fido/pronomutils.py +115 -0
  203. data/tools/fido/toxml.py +52 -0
  204. data/tools/fido/update_signatures.py +171 -0
  205. data/tools/pdfbox/pdfbox-app-1.8.10.jar +0 -0
  206. data/tools/pdfbox/preflight-app-1.8.10.jar +0 -0
  207. metadata +396 -0
Binary file
@@ -0,0 +1,106 @@
1
+ <?xml version="1.0"?>
2
+ <formats xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" version="0.3" xmlns="fido-formats.xsd">
3
+ <format>
4
+ <puid>lias-fmt/101</puid>
5
+ <mime>text/xml</mime>
6
+ <name>Extensible Markup Language</name>
7
+ <version>1.0</version>
8
+ <alias>XML (1.0)</alias>
9
+ <pronom_id>638</pronom_id>
10
+ <extension>xml</extension>
11
+ <has_priority_over>fmt/101</has_priority_over>
12
+ <signature>
13
+ <name>XML 1.0</name>
14
+ <pattern>
15
+ <position>BOF</position>
16
+ <regex>(?s)\A.{0,3}\x3c\?xml\s+version\s+=\s+(?:"|')1\.0(?:"|')</regex>
17
+ </pattern>
18
+ </signature>
19
+ </format>
20
+ <format>
21
+ <puid>lias-fmt/001</puid>
22
+ <name>Textura TS print file</name>
23
+ <mime>text/plain</mime>
24
+ <signature>
25
+ <name>Textura TS print file</name>
26
+ <pattern>
27
+ <position>BOF</position>
28
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*/TS\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {6}(\r\n){2}={75} (\r\n){2}L I J S T V A N D E U I T K E R I N G E N S Y N D\. P R E M I E S {21}EUR {4}(\r\n){2}={75} (\r\n){2}CENTR[\S ]*\d\d/\d\d/\d\d (\r\n){2}={75} \r\n</regex>
29
+ </pattern>
30
+ <pattern>
31
+ <position>VAR</position>
32
+ <regex>(?s)(\r\n){2}NUMMER DRAGER TYPE DRAGER SCHRIJFCODE DICHTHEID BLOCKINGSFACTOR \r\n</regex>
33
+ </pattern>
34
+ <pattern>
35
+ <position>VAR</position>
36
+ <regex>(?s)\r\n {41}AANTAL {17}BEDRAG \r\n</regex>
37
+ </pattern>
38
+ <pattern>
39
+ <position>VAR</position>
40
+ <regex>(?s)\r\n {5}POSTASSIGNATIES : </regex>
41
+ </pattern>
42
+ <pattern>
43
+ <position>VAR</position>
44
+ <regex>(?s)\r\n {5}CIRCULAIRE CHEQUES : </regex>
45
+ </pattern>
46
+ <pattern>
47
+ <position>VAR</position>
48
+ <regex>(?s)\r\n {5}CIRC\.CHEQUES \(TERUG ORGAN\.\) : </regex>
49
+ </pattern>
50
+ <pattern>
51
+ <position>VAR</position>
52
+ <regex>(?s)\r\n {5}OVERSCHRIJVINGEN : </regex>
53
+ </pattern>
54
+ <!--
55
+ -->
56
+ <pattern>
57
+ <position>VAR</position>
58
+ <regex>(?s)\r\n {5}ALGEMEEN TOTAAL : </regex>
59
+ </pattern>
60
+ <pattern>
61
+ <position>EOF</position>
62
+ <regex>(?s)\r\nHANDTEKENING\(EN\) AFGEVER: {9}( .){10}(\r\n){3}HANDTEKENING VOOR ONTVANGST: {6}( .){10}\r\n( \f|\f\x1a)\x00+\Z</regex>
63
+ </pattern>
64
+ </signature>
65
+ </format>
66
+ <format>
67
+ <puid>lias-fmt/002</puid>
68
+ <name>Textura VP/S print file</name>
69
+ <mime>text/plain</mime>
70
+ <has_priority_over>lias-fmt/001</has_priority_over>
71
+ <signature>
72
+ <name>Textura VP/S print file</name>
73
+ <pattern>
74
+ <position>BOF</position>
75
+ <regex>(?s)\A(\f#%\*P66610\r\nBACKUP-NAME : \(PR\)[A-Z/\d]*V[PS]\d* ON USER\r\nCREATION-DATE : \d{6}\r\n\*%#\f )?\f {49}(LIJST|LISTE) AVP .{55}(REEKS|SERIE) :[ \d]*\r\n {48}-{57} {3}\r\n\r\n</regex>
76
+ </pattern>
77
+ <pattern>
78
+ <position>EOF</position>
79
+ <regex>(?s)\r\n( \f|\f\x1a)\x00+\Z</regex>
80
+ </pattern>
81
+ </signature>
82
+ </format>
83
+ <format>
84
+ <puid>lias-fmt/189.word</puid>
85
+ <name>Microsoft Office Open XML - Word</name>
86
+ <extension>docx</extension>
87
+ <has_priority_over>x-fmt/263</has_priority_over>
88
+ <has_priority_over>fmt/189</has_priority_over>
89
+ <has_priority_over>fido-fmt/189.word</has_priority_over>
90
+ <signature>
91
+ <name>Microsoft Office Open XML - Word</name>
92
+ <pattern>
93
+ <position>BOF</position>
94
+ <regex>(?s)\APK\x03\x04</regex>
95
+ </pattern>
96
+ <pattern>
97
+ <position>BOF</position>
98
+ <regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
99
+ </pattern>
100
+ <pattern>
101
+ <position>EOF</position>
102
+ <regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,4000}\Z</regex>
103
+ </pattern>
104
+ </signature>
105
+ </format>
106
+ </formats>
data/data/types.yml ADDED
@@ -0,0 +1,217 @@
1
+ ---
2
+ # This lists all the types the converters know about along with the mime types and file extensions.
3
+ # The first file extension in the list is the default one that will be used when a file of that type is created.
4
+ # The mime types need to be unique. Some mime types need to be invented like for instance for PDF/A. The MimeType
5
+ # class should take care of that.
6
+ # Preferably the file extensions are unique too. If not, the first matching entry in the list will be used when a
7
+ # reverse lookup from extension to type identifier is performed. However, file extensions will typically not be used
8
+ # to determine type identifier or mime types. So you should be fairly safe when the file extensions are not unique.
9
+
10
+ IMAGE:
11
+ TIFF:
12
+ NAME: Tagged Image File Format (TIFF)
13
+ MIME: image/tiff
14
+ EXTENSIONS: tif,tiff
15
+
16
+ JP2:
17
+ NAME: Joint Photographic Experts Group 2000 (JPEG 2000)
18
+ MIME: image/jp2
19
+ EXTENSIONS: jp2
20
+
21
+ JPG:
22
+ NAME: Joint Photographic Experts Group (JPEG)
23
+ MIME: image/jpeg
24
+ EXTENSIONS: jpg,jpe,jpeg
25
+
26
+ PNG:
27
+ NAME: Portable Network Graphics (PNG)
28
+ MIME: image/png
29
+ EXTENSIONS: png
30
+
31
+ BMP:
32
+ NAME: Device Independent Bitmap (DIP/BMP)
33
+ MIME: image/bmp,image/x-ms-bmp
34
+ EXTENSIONS: bmp
35
+
36
+ GIF:
37
+ NAME: Graphics Interchange Format (GIF)
38
+ MIME: image/gif
39
+ EXTENSIONS: gif
40
+
41
+ PBM:
42
+ NAME: Portable Bitmap Format (PBM)
43
+ PUID: fmt/409
44
+ MIME: image/x‑portable‑bitmap
45
+ EXTENSIONS: pbm
46
+
47
+ PGM:
48
+ NAME: Portable GrayMap Format (PGM)
49
+ PUID: fmt/406
50
+ MIME: image/x‑portable‑graymap
51
+ EXTENSIONS: pgm
52
+
53
+ PPM:
54
+ NAME: Portable Pixel Map (PPM)
55
+ PUID: fmt/408
56
+ MIME: image/x‑portable‑pixmap
57
+ EXTENSIONS: ppm
58
+
59
+ AUDIO:
60
+
61
+ WAV:
62
+ NAME: Waveform Audio File Format (WAVE)
63
+ MIME: audio/x-wav
64
+ EXTENSIONS: wav
65
+
66
+ MP3:
67
+ NAME: MPEG-1 or MPEG-2 Audio Layer III (MP3)
68
+ MIME: audio/mpeg
69
+ EXTENSIONS: mp3
70
+
71
+ FLAC:
72
+ NAME: Free Lossless Audio Codec (FLAC)
73
+ MIME: audio/flac
74
+ EXTENSIONS: flac
75
+
76
+ VIDEO:
77
+
78
+ MPEG:
79
+ NAME: Moving Picture Experts Group (MPEG-1/MPEG-2)
80
+ MIME: video/mpeg
81
+ EXTENSIONS: mpg mpeg mp1 mp2 mp3 m1v m1a m2a mpa mpv
82
+
83
+ MPEG4:
84
+ NAME: Moving Picture Experts Group (MPEG-4)
85
+ MIME: video/mp4
86
+ EXTENSIONS: mp4,mpeg4
87
+
88
+ MJP2:
89
+ NAME: Motion JPEG 2000 (MJP2)
90
+ MIME: video/mj2
91
+ EXTENSIONS: mj2, mjp2
92
+
93
+ QTFF:
94
+ NAME: QuickTime File Format (QTFF)
95
+ MIME: video/quicktime
96
+ EXTENSIONS: mov, qt
97
+
98
+ AVI:
99
+ NAME: Audio Video Interleave (AVI)
100
+ MIME: video/x-msvideo
101
+ EXTENSIONS: avi
102
+
103
+ OGGV:
104
+ NAME: OGG Video (OGGV)
105
+ MIME: video/ogg
106
+ EXTENSIONS: ogv
107
+
108
+ WMV:
109
+ NAME: Windows Media Video (WMV)
110
+ MIME: video/x-ms-wmv
111
+ EXTENSIONS: wmv
112
+
113
+ DV:
114
+ NAME: Digital Video (DV)
115
+ MIME: video/dv
116
+ EXTENSIONS: dv
117
+
118
+ FLASH:
119
+ NAME: Flash video (FLV)
120
+ MIME: video/x-flv
121
+ EXTENSIONS: flv
122
+
123
+ TEXT:
124
+
125
+ TXT:
126
+ NAME: Unformatted text
127
+ MIME: text/plain
128
+ EXTENSIONS: txt
129
+
130
+ RTF:
131
+ NAME: Rich Text Format (RTF)
132
+ PUID: fmt/45
133
+ MIME: text/rtf application/rtf
134
+ EXTENSIONS: rtf
135
+
136
+ HTML:
137
+ NAME: HyperText Markup Language (HTML)
138
+ MIME: text/html
139
+ EXTENSIONS: html, htm
140
+
141
+ MSDOC:
142
+ NAME: Microsoft Word Document (DOC)
143
+ PUID: fmt/609 fmt/39 x-fmt/2 x-fmt/129 x-fmt/273 x-fmt/274 x-fmt/275 x-fmt/276 fmt/37 fmt/38 fmt/40
144
+ MIME: application/vnd.ms-word application/msword
145
+ EXTENSIONS: doc
146
+
147
+ MSDOCX:
148
+ NAME: Microsoft Word OpenXML Document (DOCX)
149
+ PUID: fmt/412 fido-fmt/189.word lias-fmt/189.word
150
+ MIME: application/vnd.openxmlformats-officedocument.wordprocessingml.document
151
+ EXTENSIONS: docx
152
+
153
+ PDF:
154
+ NAME: Adobe Portable Document Format (PDF)
155
+ PUID: fmt/14 fmt/15 fmt/16 fmt/17 fmt/18 fmt/19 fmt/20 fmt/276
156
+ MIME: application/pdf
157
+ EXTENSIONS: pdf
158
+
159
+ PDFA:
160
+ NAME: Adobe Portable Document Format for Archives (PDFA)
161
+ PUID: fmt/95 fmt/354 fmt/476 fmt/477 fmt/478 fmt/479 fmt/480 fmt/481
162
+ MIME: application/pdf
163
+ EXTENSIONS: pdf
164
+
165
+ WORDPERFECT:
166
+ NAME: WordPerfect Document (WPD)
167
+ PUID: x-fmt/44 x-fmt/394
168
+ MIME: application/vnd.wordperfect
169
+ EXTENSIONS: wpd
170
+
171
+ XML:
172
+ NAME: Extensible Markup Language (XML)
173
+ MIME: text/xml
174
+ PUID: fmt/101
175
+ EXTENSIONS: xml
176
+
177
+ SHAREPOINT_MAP:
178
+ NAME: Sharepoint mapping file
179
+ # This is again an invented mime type. Its actually an XML ...
180
+ MIME: text/xml/sharepoint_map
181
+ PUID: fmt/101
182
+ EXTENSIONS: xml
183
+
184
+ TABULAR:
185
+
186
+ MSXLS:
187
+ NAME: Microsoft Excel Spreadsheet (XLS)
188
+ MIME: application/vnd.ms-excel,application/msexcel
189
+ EXTENSIONS: xls
190
+
191
+ MSXLSX:
192
+ NAME: Microsoft Excel OpenXML Spreadheet (XSLX)
193
+ PUID: fido-fmt/189.xl
194
+ MIME: application/vnd.openxmlformats-officedocument.spreadsheetml.sheet
195
+ EXTENSIONS: xslx
196
+
197
+ PRESENTATION:
198
+
199
+ MSPPT:
200
+ NAME: Microsoft Powerpoint Presentation (PPT)
201
+ MIME: application/vnd.ms-powerpoint,application/mspowerpoint
202
+ EXTENSIONS: ppt
203
+
204
+ MSPPTX:
205
+ NAME: Microsoft Powerpoint OpenXML Presentation (PPTX)
206
+ PUID: fido-fmt/189.ppt
207
+ MIME: application/vnd.openxmlformats-officedocument.presentationml.presentation
208
+ EXTENSIONS: pptx
209
+
210
+ ARCHIVE:
211
+
212
+ EAD:
213
+ NAME: Encoded Archival Description (EAD)
214
+ # This is again an invented mime type. Its actually an XML ...
215
+ MIME: archive/ead
216
+ PUID: fmt/101
217
+ EXTENSIONS: ead,xml
@@ -0,0 +1,35 @@
1
+ # encoding: utf-8
2
+ require 'os'
3
+
4
+ module Libis
5
+ module Format
6
+
7
+ # noinspection RubyConstantNamingConvention
8
+ Config = ::Libis::Tools::Config
9
+
10
+ Config[:converter_chain_max_level] = 8
11
+
12
+ Config[:java_path] = 'java'
13
+ Config[:soffice_path] = 'soffice'
14
+ Config[:ghostscript_path] = 'gs'
15
+ # Config[:pdfa_path] =
16
+ # File.absolute_path(
17
+ # File.join(
18
+ # File.dirname(__FILE__), '..', '..', '..', 'tools', 'pdf', 'pdfa', 'pdfa'
19
+ # )
20
+ # )
21
+ Config[:droid_path] =
22
+ File.absolute_path(
23
+ File.join(
24
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'droid', OS.windows? ? 'droid.bat' : 'droid.sh'
25
+ )
26
+ )
27
+ Config[:fido_path] =
28
+ File.absolute_path(
29
+ File.join(
30
+ File.dirname(__FILE__), '..', '..', '..', 'tools', 'fido', OS.windows? ? 'fido.bat' : 'fido.sh'
31
+ )
32
+ )
33
+
34
+ end
35
+ end
@@ -0,0 +1,101 @@
1
+ # coding: utf-8
2
+
3
+ ### require 'tools/string'
4
+ require 'tmpdir'
5
+ require 'libis/tools/logger'
6
+ require 'libis/format/type_database'
7
+
8
+ require_relative 'repository'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Base
15
+ include Libis::Tools::Logger
16
+
17
+ attr_reader :options, :flags
18
+
19
+ def initialize
20
+ @options = {}
21
+ @flags = {}
22
+ end
23
+
24
+ def convert(source, target, format, opts = {})
25
+ unless File.exist? source
26
+ error "Cannot find file '#{source}'."
27
+ return nil
28
+ end
29
+ @options.merge!(opts[:options]) if opts[:options]
30
+ @flags.merge!(opts[:flags]) if opts[:flags]
31
+ end
32
+
33
+ def self.input_types(_ = nil)
34
+ raise RuntimeError, 'Method #input_types needs to be overridden in converter'
35
+ end
36
+
37
+ def self.output_types(_ = nil)
38
+ raise RuntimeError, 'Method #output_types needs to be overridden in converter'
39
+ end
40
+
41
+
42
+ def using_temp(target)
43
+ tempfile = File.join(Dir.tmpdir, Dir::Tmpname.make_tmpname(['convert', File.extname(target)], File.basename(target, '.*').gsub(/\s/, '_')))
44
+ result = yield tempfile
45
+ return nil unless result
46
+ FileUtils.move result, target
47
+ target
48
+ end
49
+
50
+ def Base.inherited( klass )
51
+
52
+ Repository.register klass
53
+
54
+ class << self
55
+
56
+ def conversions
57
+ input_types.inject({}) do |hash, input_type|
58
+ hash[input_type] = output_types
59
+ hash
60
+ end
61
+ end
62
+
63
+ def input_type?(type_id)
64
+ input_types.include? type_id
65
+ end
66
+
67
+ def output_type?(type_id)
68
+ output_types.include? type_id
69
+ end
70
+
71
+ def input_mimetype?(mimetype)
72
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
73
+ input_type? type_id
74
+ end
75
+
76
+ def output_mimetype?(mimetype)
77
+ type_id = TypeDatabase.instance.mime_types(mimetype).first
78
+ output_type? type_id
79
+ end
80
+
81
+ def conversion?(input_type, output_type)
82
+ conversions[input_type] and conversions[input_type].any? { |t| t == output_type }
83
+ end
84
+
85
+ def output_for(input_type)
86
+ conversions[input_type]
87
+ end
88
+
89
+ def extension?(extension)
90
+ !TypeDatabase.ext_types(extension).first.nil?
91
+ end
92
+
93
+ end
94
+
95
+ end
96
+
97
+ end
98
+
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,167 @@
1
+ # coding: utf-8
2
+
3
+ require 'fileutils'
4
+ require 'deep_dive'
5
+
6
+ require 'libis/tools/logger'
7
+ require 'libis/tools/extend/hash'
8
+ require 'libis/format/type_database'
9
+
10
+ module Libis
11
+ module Format
12
+ module Converter
13
+
14
+ class Chain
15
+ include ::Libis::Tools::Logger
16
+ include DeepDive
17
+
18
+ def initialize(source_format, target_format, operations = {})
19
+ @source_format = source_format.to_sym
20
+ @target_format = target_format.to_sym
21
+ @operations = operations || {}
22
+ @converter_chain = []
23
+ end
24
+
25
+ # @param [Libis::Format::Converter::Base.class] converter
26
+ # @return [Array[Hash]]
27
+ def append(converter)
28
+ return [] unless converter
29
+ valid_chain_nodes(converter).map do |node|
30
+ self.ddup.add_chain_node(node)
31
+ end.compact
32
+ end
33
+
34
+ def closed?
35
+ !@converter_chain.empty? &&
36
+ @converter_chain.first[:input].to_sym == @source_format &&
37
+ @converter_chain.last[:output].to_sym == @target_format
38
+ end
39
+
40
+ def valid?
41
+ closed? && apply_operations
42
+ end
43
+
44
+ def to_array
45
+ @converter_chain
46
+ end
47
+
48
+ def size
49
+ @converter_chain.size
50
+ end
51
+
52
+ alias_method :length, :size
53
+
54
+ def to_s
55
+ "#{@source_format}->-#{@converter_chain.map do |node|
56
+ "#{node[:converter].name.gsub(/^.*::/, '')}#{node[:operations].empty? ? '' :
57
+ "(#{node[:operations].each do |operation|
58
+ "#{operation[:method]}:#{operation[:argument]}"
59
+ end.join(',')})"}->-#{node[:output]}"
60
+ end.join('->-')}"
61
+ end
62
+
63
+ def convert(src_file, target_file)
64
+
65
+ unless valid?
66
+ error 'Converter chain is not valid'
67
+ return nil
68
+ end
69
+
70
+ temp_files = []
71
+
72
+ # noinspection RubyParenthesesAroundConditionInspection
73
+ result = @converter_chain.each_with_index do |node, i|
74
+
75
+ target_type = node[:output]
76
+ converter_class = node[:converter]
77
+ converter = converter_class.new
78
+
79
+ node[:operations].each do |operation|
80
+ converter.send operation[:method], operation[:argument]
81
+ end if node[:operations]
82
+
83
+ target = target_file
84
+
85
+ if i < size - 1
86
+ target += ".temp.#{TypeDatabase.type_extentions(target_type).first}"
87
+ target += ".#{TypeDatabase.type_extentions(target_type).first}" while File.exist? target
88
+ temp_files << target
89
+ end
90
+
91
+ FileUtils.mkdir_p File.dirname(target)
92
+
93
+ src_file = converter.convert(src_file, target, target_type)
94
+
95
+ break :failed unless src_file
96
+
97
+ end
98
+
99
+ temp_files.each do |f|
100
+ FileUtils.rm(f, force: true)
101
+ end
102
+
103
+ result == :failed ? nil : target_file
104
+
105
+ end
106
+
107
+ def valid_chain_nodes(converter)
108
+ source_format = @converter_chain.last[:output] rescue @source_format
109
+ nodes = []
110
+ if converter.input_types.include? source_format
111
+ converter.output_types(source_format).each do |format|
112
+ node = {converter: converter, input: source_format, output: format}
113
+ next if node_exists?(node)
114
+ nodes << node
115
+ end
116
+ end
117
+ nodes
118
+ end
119
+
120
+ def add_chain_node(node = {})
121
+ last_converter = @converter_chain.last
122
+ source_format = last_converter ? last_converter[:output] : @source_format
123
+ node[:input] ||= source_format
124
+ return nil unless node[:input] == source_format
125
+ return nil unless node[:output] && node[:converter].output_types(source_format).include?(node[:output])
126
+ return nil unless node[:converter].input_types.include? source_format
127
+ return nil if node_exists?(node)
128
+ @converter_chain << node
129
+ # debug "Chain: #{self}"
130
+ self
131
+ end
132
+
133
+ def apply_operations
134
+ temp_chain = @converter_chain.reverse.ddup
135
+ applied = true
136
+ operations = @operations && @operations.ddup || {}
137
+ while (operation = operations.shift)
138
+ method = operation.first.to_s.to_sym
139
+ applied &&= :found == temp_chain.each do |node|
140
+ next unless node[:converter].instance_methods.include?(method)
141
+ node[:operations] ||= []
142
+ node[:operations] << {method: method, argument: operation.last}
143
+ break :found
144
+ end
145
+ end
146
+ if applied && operations.empty?
147
+ @converter_chain = temp_chain.reverse
148
+ @operations.clear
149
+ return true
150
+ end
151
+ false
152
+ end
153
+
154
+
155
+ private
156
+
157
+ def node_exists?(node)
158
+ @converter_chain.detect do |n|
159
+ n[:converter] == node[:converter] && n[:input] == node[:input] && n[:output] == node[:output]
160
+ end
161
+ end
162
+
163
+ end
164
+
165
+ end
166
+ end
167
+ end