libis-format 0.9.32 → 0.9.33

Sign up to get free protection for your applications and to get access to all the features.
Files changed (139) hide show
  1. checksums.yaml +4 -4
  2. data/data/types.yml +30 -16
  3. data/lib/libis/format/config.rb +7 -18
  4. data/lib/libis/format/converter/image_converter.rb +6 -0
  5. data/lib/libis/format/droid.rb +82 -25
  6. data/lib/libis/format/extension_identification.rb +55 -0
  7. data/lib/libis/format/fido.rb +57 -72
  8. data/lib/libis/format/file_tool.rb +76 -0
  9. data/lib/libis/format/identification_tool.rb +174 -0
  10. data/lib/libis/format/identifier.rb +129 -117
  11. data/lib/libis/format/type_database.rb +36 -5
  12. data/lib/libis/format/version.rb +1 -1
  13. data/lib/libis/format.rb +3 -0
  14. data/libis-format.gemspec +2 -1
  15. data/spec/converter_spec.rb +6 -4
  16. data/spec/identifier_spec.rb +125 -34
  17. metadata +21 -126
  18. data/tools/droid/DROID_SignatureFile_V90.xml +0 -40182
  19. data/tools/droid/container-signature-20170330.xml +0 -3584
  20. data/tools/droid/droid-command-line-6.3.jar +0 -0
  21. data/tools/droid/droid.bat +0 -152
  22. data/tools/droid/droid.sh +0 -152
  23. data/tools/droid/lib/XmlSchema-1.4.7.jar +0 -0
  24. data/tools/droid/lib/activation-1.1.jar +0 -0
  25. data/tools/droid/lib/aopalliance-1.0.jar +0 -0
  26. data/tools/droid/lib/asm-2.2.3.jar +0 -0
  27. data/tools/droid/lib/aspectjrt-1.8.7.jar +0 -0
  28. data/tools/droid/lib/aspectjweaver-1.8.7.jar +0 -0
  29. data/tools/droid/lib/bcmail-jdk14-138.jar +0 -0
  30. data/tools/droid/lib/bcprov-jdk14-138.jar +0 -0
  31. data/tools/droid/lib/beansbinding-1.2.1.jar +0 -0
  32. data/tools/droid/lib/byteseek-2.0.3.jar +0 -0
  33. data/tools/droid/lib/cglib-nodep-2.2.2.jar +0 -0
  34. data/tools/droid/lib/classmate-1.0.0.jar +0 -0
  35. data/tools/droid/lib/commons-cli-1.2.jar +0 -0
  36. data/tools/droid/lib/commons-codec-1.10.jar +0 -0
  37. data/tools/droid/lib/commons-collections-3.2.2.jar +0 -0
  38. data/tools/droid/lib/commons-compress-1.4.1.jar +0 -0
  39. data/tools/droid/lib/commons-configuration-1.8.jar +0 -0
  40. data/tools/droid/lib/commons-dbcp-1.4.jar +0 -0
  41. data/tools/droid/lib/commons-httpclient-3.1.jar +0 -0
  42. data/tools/droid/lib/commons-io-2.4.jar +0 -0
  43. data/tools/droid/lib/commons-lang-2.6.jar +0 -0
  44. data/tools/droid/lib/commons-logging-1.1.1.jar +0 -0
  45. data/tools/droid/lib/commons-pool-1.5.4.jar +0 -0
  46. data/tools/droid/lib/cxf-api-2.2.12.jar +0 -0
  47. data/tools/droid/lib/cxf-common-schemas-2.2.12.jar +0 -0
  48. data/tools/droid/lib/cxf-common-utilities-2.2.12.jar +0 -0
  49. data/tools/droid/lib/cxf-rt-bindings-http-2.2.12.jar +0 -0
  50. data/tools/droid/lib/cxf-rt-bindings-soap-2.2.12.jar +0 -0
  51. data/tools/droid/lib/cxf-rt-bindings-xml-2.2.12.jar +0 -0
  52. data/tools/droid/lib/cxf-rt-core-2.2.12.jar +0 -0
  53. data/tools/droid/lib/cxf-rt-databinding-jaxb-2.2.12.jar +0 -0
  54. data/tools/droid/lib/cxf-rt-frontend-jaxws-2.2.12.jar +0 -0
  55. data/tools/droid/lib/cxf-rt-frontend-simple-2.2.12.jar +0 -0
  56. data/tools/droid/lib/cxf-rt-transports-http-2.2.12.jar +0 -0
  57. data/tools/droid/lib/cxf-rt-ws-addr-2.2.12.jar +0 -0
  58. data/tools/droid/lib/cxf-tools-common-2.2.12.jar +0 -0
  59. data/tools/droid/lib/de.huxhorn.lilith.3rdparty.flyingsaucer.core-renderer-8RC1.jar +0 -0
  60. data/tools/droid/lib/derby-10.10.2.0.jar +0 -0
  61. data/tools/droid/lib/droid-container-6.3.jar +0 -0
  62. data/tools/droid/lib/droid-core-6.3.jar +0 -0
  63. data/tools/droid/lib/droid-core-interfaces-6.3.jar +0 -0
  64. data/tools/droid/lib/droid-export-6.3.jar +0 -0
  65. data/tools/droid/lib/droid-export-interfaces-6.3.jar +0 -0
  66. data/tools/droid/lib/droid-help-6.3.jar +0 -0
  67. data/tools/droid/lib/droid-report-6.3.jar +0 -0
  68. data/tools/droid/lib/droid-report-interfaces-6.3.jar +0 -0
  69. data/tools/droid/lib/droid-results-6.3.jar +0 -0
  70. data/tools/droid/lib/geronimo-activation_1.1_spec-1.0.2.jar +0 -0
  71. data/tools/droid/lib/geronimo-annotation_1.0_spec-1.1.1.jar +0 -0
  72. data/tools/droid/lib/geronimo-javamail_1.4_spec-1.6.jar +0 -0
  73. data/tools/droid/lib/geronimo-jaxws_2.1_spec-1.0.jar +0 -0
  74. data/tools/droid/lib/geronimo-stax-api_1.0_spec-1.0.1.jar +0 -0
  75. data/tools/droid/lib/geronimo-ws-metadata_2.0_spec-1.1.2.jar +0 -0
  76. data/tools/droid/lib/hibernate-validator-5.1.0.Final.jar +0 -0
  77. data/tools/droid/lib/itext-2.0.8.jar +0 -0
  78. data/tools/droid/lib/javahelp-2.0.05.jar +0 -0
  79. data/tools/droid/lib/jaxb-api-2.1.jar +0 -0
  80. data/tools/droid/lib/jaxb-impl-2.1.13.jar +0 -0
  81. data/tools/droid/lib/jboss-logging-3.1.3.GA.jar +0 -0
  82. data/tools/droid/lib/joda-time-1.6.2.jar +0 -0
  83. data/tools/droid/lib/jra-1.0-alpha-4.jar +0 -0
  84. data/tools/droid/lib/jta-1.1.jar +0 -0
  85. data/tools/droid/lib/jwat-arc-1.0.3.jar +0 -0
  86. data/tools/droid/lib/jwat-archive-common-1.0.3.jar +0 -0
  87. data/tools/droid/lib/jwat-common-1.0.3.jar +0 -0
  88. data/tools/droid/lib/jwat-gzip-1.0.3.jar +0 -0
  89. data/tools/droid/lib/jwat-warc-1.0.2.jar +0 -0
  90. data/tools/droid/lib/log4j-1.2.13.jar +0 -0
  91. data/tools/droid/lib/neethi-2.0.4.jar +0 -0
  92. data/tools/droid/lib/opencsv-2.3.jar +0 -0
  93. data/tools/droid/lib/org-netbeans-swing-outline-7.2.jar +0 -0
  94. data/tools/droid/lib/org-openide-util-7.2.jar +0 -0
  95. data/tools/droid/lib/org-openide-util-lookup-7.2.jar +0 -0
  96. data/tools/droid/lib/poi-3.13.jar +0 -0
  97. data/tools/droid/lib/saaj-api-1.3.jar +0 -0
  98. data/tools/droid/lib/saaj-impl-1.3.2.jar +0 -0
  99. data/tools/droid/lib/slf4j-api-1.4.2.jar +0 -0
  100. data/tools/droid/lib/slf4j-log4j12-1.4.2.jar +0 -0
  101. data/tools/droid/lib/spring-aop-4.0.3.RELEASE.jar +0 -0
  102. data/tools/droid/lib/spring-beans-4.0.3.RELEASE.jar +0 -0
  103. data/tools/droid/lib/spring-context-4.0.3.RELEASE.jar +0 -0
  104. data/tools/droid/lib/spring-core-4.0.3.RELEASE.jar +0 -0
  105. data/tools/droid/lib/spring-expression-4.0.3.RELEASE.jar +0 -0
  106. data/tools/droid/lib/spring-jdbc-4.0.3.RELEASE.jar +0 -0
  107. data/tools/droid/lib/spring-orm-4.0.3.RELEASE.jar +0 -0
  108. data/tools/droid/lib/spring-tx-4.0.3.RELEASE.jar +0 -0
  109. data/tools/droid/lib/spring-web-2.5.6.jar +0 -0
  110. data/tools/droid/lib/stax-api-1.0-2.jar +0 -0
  111. data/tools/droid/lib/trove4j-3.0.3.jar +0 -0
  112. data/tools/droid/lib/truezip-6.8.4.jar +0 -0
  113. data/tools/droid/lib/validation-api-1.1.0.Final.jar +0 -0
  114. data/tools/droid/lib/wsdl4j-1.6.2.jar +0 -0
  115. data/tools/droid/lib/wstx-asl-3.2.9.jar +0 -0
  116. data/tools/droid/lib/xercesImpl-2.9.1.jar +0 -0
  117. data/tools/droid/lib/xml-apis-1.3.04.jar +0 -0
  118. data/tools/droid/lib/xml-resolver-1.2.jar +0 -0
  119. data/tools/droid/lib/xz-1.0.jar +0 -0
  120. data/tools/fido/__init__.py +0 -50
  121. data/tools/fido/conf/DROID_SignatureFile-v90.xml +0 -2
  122. data/tools/fido/conf/container-signature-20170330.xml +0 -3584
  123. data/tools/fido/conf/dc.xsd +0 -119
  124. data/tools/fido/conf/dcmitype.xsd +0 -53
  125. data/tools/fido/conf/dcterms.xsd +0 -383
  126. data/tools/fido/conf/fido-formats.xsd +0 -173
  127. data/tools/fido/conf/format_extension_template.xml +0 -105
  128. data/tools/fido/conf/format_extensions.xml +0 -484
  129. data/tools/fido/conf/formats-v90.xml +0 -48877
  130. data/tools/fido/conf/pronom-xml-v90.zip +0 -0
  131. data/tools/fido/conf/versions.xml +0 -8
  132. data/tools/fido/fido.bat +0 -4
  133. data/tools/fido/fido.py +0 -884
  134. data/tools/fido/fido.sh +0 -5
  135. data/tools/fido/package.py +0 -96
  136. data/tools/fido/prepare.py +0 -645
  137. data/tools/fido/pronomutils.py +0 -200
  138. data/tools/fido/toxml.py +0 -60
  139. data/tools/fido/update_signatures.py +0 -183
@@ -1,173 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <!--
3
- Usage of DC has been based on these references:
4
- http://dublincore.org/documents/usageguide/elements.shtml
5
- http://dublincore.org/documents/usageguide/qualifiers.shtml
6
- http://www.dublincore.org/documents/dc-xml-guidelines/
7
- -->
8
- <xs:schema elementFormDefault="qualified"
9
- xmlns:xs="http://www.w3.org/2001/XMLSchema"
10
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
11
- xmlns:dc="http://purl.org/dc/elements/1.1/"
12
- xmlns:dcterms="http://purl.org/dc/terms/">
13
- <xs:import namespace="http://purl.org/dc/elements/1.1/" schemaLocation="dc.xsd"/>
14
- <xs:import namespace="http://purl.org/dc/terms/" schemaLocation="dcterms.xsd"/>
15
- <!-- http://www.dublincore.org/schemas/xmls/qdc/2003/04/02/dcterms.xsd" -->
16
- <xs:element name="formats">
17
- <xs:complexType>
18
- <xs:sequence>
19
- <xs:element maxOccurs="unbounded" ref="format"/>
20
- </xs:sequence>
21
- <xs:attribute name="version" type="xs:decimal" use="required"/>
22
- </xs:complexType>
23
- </xs:element>
24
- <xs:element name="format">
25
- <xs:complexType>
26
- <xs:sequence>
27
- <xs:element maxOccurs="1" minOccurs="1" ref="puid"/>
28
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="mime"/>
29
- <xs:element minOccurs="0" ref="container"/>
30
- <xs:element maxOccurs="1" minOccurs="1" ref="name"/>
31
- <xs:element maxOccurs="1" minOccurs="0" name="version" type="xs:string"/>
32
- <xs:element minOccurs="0" name="alias" type="xs:string"/>
33
- <xs:element maxOccurs="1" minOccurs="0" ref="pronom_id"/>
34
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="extension"/>
35
- <xs:element maxOccurs="1" minOccurs="0" name="apple_uti" type="xs:string"/>
36
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="has_priority_over"/>
37
- <xs:element maxOccurs="unbounded" ref="signature"/>
38
- <xs:element minOccurs="0" ref="note"/>
39
- <xs:element maxOccurs="1" ref="details"/>
40
- </xs:sequence>
41
- </xs:complexType>
42
- </xs:element>
43
- <xs:element name="mime" type="xs:string"/>
44
- <xs:element name="puid" type="xs:string"/>
45
- <xs:element name="container" type="container_type"/>
46
- <xs:element name="pronom_id" type="xs:integer"/>
47
- <xs:element name="extension" type="xs:string"/>
48
- <xs:element name="has_priority_over" type="xs:string"/>
49
- <xs:element name="signature">
50
- <xs:complexType>
51
- <xs:sequence>
52
- <xs:element ref="name"/>
53
- <xs:element ref="note" minOccurs="0" maxOccurs="unbounded"/>
54
- <xs:element maxOccurs="unbounded" ref="pattern"/>
55
- </xs:sequence>
56
- </xs:complexType>
57
- </xs:element>
58
- <xs:element name="note" type="xs:string"/>
59
- <xs:element name="pattern">
60
- <xs:complexType>
61
- <xs:sequence>
62
- <xs:element ref="position"/>
63
- <xs:element ref="pronom_pattern" minOccurs="0"/>
64
- <xs:element ref="regex" minOccurs="1" maxOccurs="1" />
65
- </xs:sequence>
66
- </xs:complexType>
67
- </xs:element>
68
- <xs:element name="position" type="position_type"/>
69
- <xs:element name="pronom_pattern" type="xs:string"/>
70
- <xs:element name="regex" type="xs:string"/>
71
- <xs:element name="name" type="xs:string"/>
72
- <xs:simpleType name="position_type">
73
- <xs:restriction base="xs:string">
74
- <xs:enumeration value="BOF"/>
75
- <xs:enumeration value="EOF"/>
76
- <xs:enumeration value="VAR"/>
77
- </xs:restriction>
78
- </xs:simpleType>
79
- <xs:simpleType name="container_type">
80
- <xs:restriction base="xs:string">
81
- <xs:enumeration value="zip"/>
82
- <xs:enumeration value="tar"/>
83
- </xs:restriction>
84
- </xs:simpleType>
85
- <xs:element name="details">
86
- <xs:complexType>
87
- <xs:sequence>
88
- <xs:element maxOccurs="1" minOccurs="0" ref="dc:description"/>
89
- <xs:element maxOccurs="1" minOccurs="0" ref="dcterms:available"/>
90
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="dc:creator"/>
91
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="dcterms:publisher"/>
92
- <xs:element maxOccurs="unbounded" minOccurs="0" name="is_supertype_of" type="xs:string"/>
93
- <xs:element maxOccurs="unbounded" minOccurs="0" name="is_subtype_of" type="xs:string"/>
94
- <xs:element maxOccurs="1" minOccurs="0" name="content_type" type="xs:string"/>
95
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="reference"/>
96
- <xs:element maxOccurs="unbounded" minOccurs="0" ref="example_file"/>
97
- <xs:element maxOccurs="1" minOccurs="0" ref="record_metadata"/>
98
- </xs:sequence>
99
- </xs:complexType>
100
- </xs:element>
101
- <xs:element name="reference">
102
- <xs:complexType>
103
- <xs:sequence>
104
- <xs:element maxOccurs="1" minOccurs="0" name="type" type="xs:string"/>
105
- <xs:element maxOccurs="unbounded" ref="dc:any"/>
106
- </xs:sequence>
107
- </xs:complexType>
108
- </xs:element>
109
- <xs:element name="example_file">
110
- <xs:complexType>
111
- <xs:sequence>
112
- <xs:element maxOccurs="unbounded" ref="dc:any"/>
113
- <xs:element minOccurs="1" ref="checksum"/>
114
- </xs:sequence>
115
- </xs:complexType>
116
- </xs:element>
117
- <xs:element name="record_metadata">
118
- <xs:complexType>
119
- <xs:sequence>
120
- <xs:element maxOccurs="1" minOccurs="1" ref="status"/>
121
- <xs:element maxOccurs="unbounded" ref="dc:any"/>
122
- </xs:sequence>
123
- </xs:complexType>
124
- </xs:element>
125
- <xs:element name="checksum">
126
- <xs:complexType mixed="true">
127
- <xs:sequence>
128
- </xs:sequence>
129
- <xs:attribute name="type" use="required">
130
- <xs:simpleType>
131
- <xs:restriction base="xs:string">
132
- <xs:enumeration value="md5"/>
133
- <xs:enumeration value="sha256"/>
134
- </xs:restriction>
135
- </xs:simpleType>
136
- </xs:attribute>
137
- </xs:complexType>
138
- </xs:element>
139
- <xs:element name="status">
140
- <xs:simpleType>
141
- <xs:restriction base="xs:string">
142
- <xs:enumeration value="unknown"/>
143
- <xs:enumeration value="invalid"/>
144
- <xs:enumeration value="incomplete"/>
145
- <xs:enumeration value="stub"/>
146
- <xs:enumeration value="adequate"/>
147
- <xs:enumeration value="complete"/>
148
- <xs:enumeration value="superb"/>
149
- <xs:enumeration value="deprecated"/>
150
- </xs:restriction>
151
- </xs:simpleType>
152
- </xs:element>
153
- <xs:element name="content_type">
154
- <xs:simpleType>
155
- <xs:restriction base="xs:string">
156
- <xs:enumeration value="raster"/>
157
- <xs:enumeration value="vector"/>
158
- <xs:enumeration value="image"/>
159
- <xs:enumeration value="spreadsheet"/>
160
- <xs:enumeration value="text"/>
161
- <xs:enumeration value="word-processed"/>
162
- <xs:enumeration value="page-description"/>
163
- <xs:enumeration value="audio"/>
164
- <xs:enumeration value="presentation"/>
165
- <xs:enumeration value="mark-up"/>
166
- <xs:enumeration value="database"/>
167
- <xs:enumeration value="video"/>
168
- <xs:enumeration value="email"/>
169
- <xs:enumeration value="other"/>
170
- </xs:restriction>
171
- </xs:simpleType>
172
- </xs:element>
173
- </xs:schema>
@@ -1,105 +0,0 @@
1
- <?xml version="1.0" encoding="UTF-8"?>
2
- <formats version="0.3"
3
- xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
4
- xsi:noNamespaceSchemaLocation="fido-formats.xsd"
5
- xmlns:dc="http://purl.org/dc/elements/1.1/"
6
- xmlns:dcterms="http://purl.org/dc/terms/">
7
- <format>
8
- <!-- Unique identifier for this record. If an identifier is re-used,
9
- the later record takes precedence. This allows existing records to be overridden if required. -->
10
- <puid>fido-fmt/189.word</puid>
11
- <name>Microsoft Office Open XML - Word</name>
12
- <version>1.0 Transitional</version>
13
- <alias>Word OOXML</alias>
14
- <extension>docx</extension>
15
- <apple_uid/>
16
- <!-- These are lower-priority signatures, identified by unique id. If a match is found, this record will be matched first, before those specified here. -->
17
- <has_priority_over>x-fmt/263</has_priority_over>
18
- <has_priority_over>fmt/189</has_priority_over>
19
- <signature>
20
- <name>Microsoft Office Open XML - Word</name>
21
- <!-- Each signature is composed of one or more patterns, which must all match for an identification to be successful.
22
- BOF regex -->
23
- <pattern>
24
- <position>BOF</position>
25
- <regex>(?s)\APK\x03\x04</regex>
26
- </pattern>
27
- <pattern>
28
- <position>BOF</position>
29
- <regex>(?s)\A.{30}\[Content_Types\]\.xml \xa2</regex>
30
- </pattern>
31
- <pattern>
32
- <position>EOF</position>
33
- <regex>(?s)\x00\x00word/.{1,20}\.xmlPK\x01\x02\x2d.{0,2000}\Z</regex>
34
- </pattern>
35
- </signature>
36
-
37
- <!-- These above information provide the core fido fields, i.e. those relating to identification. -->
38
- <!-- More detailed information beyond that required for identification goes in the details section. -->
39
- <details>
40
- <!-- A description of the format -->
41
- <dc:description></dc:description>
42
- <!-- The release date -->
43
- <dcterms:available/>
44
- <!-- Who created the format. -->
45
- <dc:creator>Microsoft</dc:creator>
46
- <!-- Who published the format. -->
47
- <dcterms:publisher />
48
- <!-- Which formats (identified by puid) are super-types of this one.
49
- e.g. all instances of the current type are also instances of this broader super-type. -->
50
- <is_subtype_of/>
51
- <!-- Which formats are sub-types to this one. -->
52
- <is_supertype_of/>
53
- <!-- Content Type, free text, but should be one of:
54
- raster|vector|image|spreadsheet|text|word-processed|page-description|audio|presentation|mark-up|database|video|email|other
55
- -->
56
- <content_type>word-processed</content_type>
57
-
58
- <!-- References to useful documentation or other resources.
59
- You can use any Dublin Core or DCTerms elements here. -->
60
- <reference>
61
- <!-- The type of reference this is. Should be one of:
62
- information|specification|implementation|registry|related
63
- -->
64
- <type>specification</type>
65
- <dc:title/>
66
- <dc:creator/>
67
- <dc:publisher/>
68
- <!-- If the specifcation is available at a URL, this should be included as an identifier. -->
69
- <dc:identifier/>
70
- <dc:description/>
71
- <dcterms:created/>
72
- <dcterms:modified/>
73
- <dcterms:available/>
74
- <dc:type>Authoritative</dc:type>
75
- <dcterms:license/>
76
- </reference>
77
-
78
- <!-- Example files that should be successfully identified by the above signature.
79
- You can use any Dublin Core or DCTerms elements here. -->
80
- <example_file>
81
- <dc:identifier>./this.txt</dc:identifier>
82
- <dc:rights>To the extent possible under law, Andrew Jackson has waived all copyright and related or neighbouring rights to this example file.</dc:rights>
83
- <dcterms:license>http://creativecommons.org/publicdomain/zero/1.0/</dcterms:license>
84
- <checksum type="md5">XXXXXX</checksum>
85
- </example_file>
86
-
87
- <!-- Metadata about this format record.
88
- You can use any Dublin Core or DCTerms elements here. -->
89
- <record_metadata>
90
- <!-- invalid|incomplete|stub|adequate|complete|superb|deprecated -->
91
- <status>complete</status>
92
- <dc:creator>Your name goes here.</dc:creator>
93
- <dc:description/>
94
- <dcterms:license/>
95
- <dcterms:created>11 Mar 2005</dcterms:created>
96
- <dcterms:modified>02 Aug 2005</dcterms:modified>
97
- <!-- If drawn from a reference, re-use the identifier from the references section. -->
98
- <dc:source/>
99
- <!-- Deprecated records should declare the new puid(s) here: -->
100
- <dcterms:isReplacedBy/>
101
- </record_metadata>
102
- </details>
103
-
104
- </format>
105
- </formats>