jhove-service 1.0.3
Sign up to get free protection for your applications and to get access to all the features.
- data/LICENSE.rdoc +23 -0
- data/README.rdoc +28 -0
- data/bin/JhoveApp.jar +0 -0
- data/bin/extension-mimetype.conf +209 -0
- data/bin/jhove-README-1st.txt +225 -0
- data/bin/jhove-README-conf.txt +63 -0
- data/bin/jhove.conf +50 -0
- data/bin/jhoveToolkit.jar +0 -0
- data/bin/jhoveToolkit.sh +23 -0
- data/lib/jhove_service.rb +94 -0
- data/lib/jhove_technical_metadata.rb +284 -0
- data/lib/tasks/yard.rake +31 -0
- metadata +157 -0
data/LICENSE.rdoc
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Copyright (c) 2013 by The Board of Trustees of the Leland Stanford Junior
|
2
|
+
University. All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use of this distribution in source and binary forms,
|
5
|
+
with or without modification, are permitted provided that:
|
6
|
+
* The above copyright notice and this permission notice appear in all copies and supporting documentation;
|
7
|
+
* The name, identifiers, and trademarks of The Board of Trustees
|
8
|
+
of the Leland Stanford Junior University are not used in advertising or publicity
|
9
|
+
without the express prior written permission of The Board of Trustees
|
10
|
+
of the Leland Stanford Junior University;
|
11
|
+
* Recipients acknowledge that this distribution is made available as a research courtesy,
|
12
|
+
"as is", potentially with defects, without any obligation on the part
|
13
|
+
of The Board of Trustees of the Leland Stanford Junior University to provide support,
|
14
|
+
services, or repair;
|
15
|
+
|
16
|
+
THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY DISCLAIMS ALL WARRANTIES,
|
17
|
+
EXPRESS OR IMPLIED, WITH REGARD TO THIS SOFTWARE, INCLUDING WITHOUT LIMITATION
|
18
|
+
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE,
|
19
|
+
AND IN NO EVENT SHALL THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY
|
20
|
+
BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
|
21
|
+
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, TORT
|
22
|
+
(INCLUDING NEGLIGENCE) OR STRICT LIABILITY, ARISING OUT OF OR IN CONNECTION
|
23
|
+
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
= jhove-service
|
2
|
+
|
3
|
+
Require the following:
|
4
|
+
require 'jhove_service'
|
5
|
+
|
6
|
+
|
7
|
+
This will give you:
|
8
|
+
JhoveService
|
9
|
+
|
10
|
+
== Build and release procedure
|
11
|
+
Modify the version number in jhove-service.gemspec, then push your commits to AFS. DO NOT TAG!
|
12
|
+
Run: 'rake dlss_release' to tag, build, and publish the gem
|
13
|
+
See the Rakefile and the LyberCore::DlssRelease task in lyberteam-devel/lib/dlss/rake/dlss_release.rb
|
14
|
+
for more details
|
15
|
+
|
16
|
+
== Releases
|
17
|
+
- <b>0.1.0</b> First release
|
18
|
+
- <b>0.1.1</b> Rename temp to target (location in which to create output)
|
19
|
+
- <b>0.2.0</b> Change method of transforming JHOVE output to technicalMetadata
|
20
|
+
- <b>0.2.1</b> Enable processing of a subset of files in a directory
|
21
|
+
- <b>0.2.2</b> Enable upgrading of old technical metadata to the new format
|
22
|
+
- <b>1.0.0</b> Update JhoveToolkit.jar to v1.0.0. Fix output for filetypes w/o jhove module (bytestreams)
|
23
|
+
- <b>1.0.1</b> Updated to use new sul-gems server and new lyberteam-gems-devel utility
|
24
|
+
- <b>1.0.2</b> Fixed negative indentation problem for empty elements( e.g. <elem/>)
|
25
|
+
- <b>1.0.3</b> Upgraded bin/jhoveApp.jar to copy from Jhove 1.9 release
|
26
|
+
== Copyright
|
27
|
+
|
28
|
+
Copyright (c) 2013 Stanford University Library. See LICENSE for details.
|
data/bin/JhoveApp.jar
ADDED
Binary file
|
@@ -0,0 +1,209 @@
|
|
1
|
+
3dmf=x-world/x-3dmf
|
2
|
+
ai=application/vnd.adobe-illustrator
|
3
|
+
aif=audio/x-aiff
|
4
|
+
aifc=audio/x-aiff
|
5
|
+
aiff=audio/x-aiff
|
6
|
+
als=image/x-vivid
|
7
|
+
art=image/x-first-publisher-art
|
8
|
+
asc=image/x-hp-graphic-obj
|
9
|
+
asf=video/x-ms-asf
|
10
|
+
asp=text/html
|
11
|
+
asx=video/x-ms-asx
|
12
|
+
au=audio/x-au
|
13
|
+
avi=video/x-msvideo
|
14
|
+
bcpio=application/dca-rft
|
15
|
+
bin=application/macbinary
|
16
|
+
bm=image/x-xbitmap
|
17
|
+
bmp=image/bmp
|
18
|
+
c4=image/x-ccitt4
|
19
|
+
cal=image/x-cals
|
20
|
+
cals=image/x-cals
|
21
|
+
ccrf=image/x-calcomp-ccrf
|
22
|
+
cdda=audio/x-aiff
|
23
|
+
cdr=image/vnd.corel-draw
|
24
|
+
cel=image/x-lumena-cel
|
25
|
+
cfm=text/html
|
26
|
+
cgi=text/html
|
27
|
+
class=application/octet-stream
|
28
|
+
clp=application/x-pcpaint
|
29
|
+
cm=image/x-puzzle
|
30
|
+
cpio=application/x-DisplayWrite-txt
|
31
|
+
crf=image/x-calcomp-ccrf
|
32
|
+
css=text/css
|
33
|
+
ct=image/x-scitex-ct
|
34
|
+
dcr=application/x-director
|
35
|
+
dcx=image/x-pcx
|
36
|
+
dir=application/x-director
|
37
|
+
doc=application/vnd.ms-word
|
38
|
+
dvi=application/x-dvi
|
39
|
+
dxf=application/x-autoCAD-dxf
|
40
|
+
dxr=application/x-director
|
41
|
+
eml=text/plain
|
42
|
+
epi=image/x-eps-interchange
|
43
|
+
eps=image/eps
|
44
|
+
epsf=image/eps
|
45
|
+
epsi=image/x-eps-interchange
|
46
|
+
etx=text/x-setextetx
|
47
|
+
evy=application/x-envoy
|
48
|
+
exe=application/octet-stream
|
49
|
+
fh7=application/vnd.mm-freehand
|
50
|
+
fif=image/x-fractal
|
51
|
+
gif=image/gif
|
52
|
+
gis=application/x-erdas-lan
|
53
|
+
gm=image/x-autologic-gm
|
54
|
+
gm2=image/x-autologic-gm
|
55
|
+
gm4=image/x-autologic-gm
|
56
|
+
goe=image/x-goes
|
57
|
+
goes=image/x-goes
|
58
|
+
grb=image/x-hp-graphic-obj
|
59
|
+
gtar=application/x-gtar
|
60
|
+
hdf=application/vnd.samna
|
61
|
+
hpg=application/vnd.hp-HPGL
|
62
|
+
hpgl=application/vnd.hp-HPGL
|
63
|
+
hqx=application/mac-binhex40
|
64
|
+
hrf=image/x-hitachi-raster
|
65
|
+
htm=text/html
|
66
|
+
html=text/html
|
67
|
+
ico=image/x-sun-icon
|
68
|
+
icon=image/x-sun-icon
|
69
|
+
ics=application/ProWrite
|
70
|
+
idc=image/x-core-idc
|
71
|
+
ief=image/ief
|
72
|
+
iff=image/x-iff-ilbm
|
73
|
+
ilbm=image/x-iff-ilbm
|
74
|
+
im=image/x-sun-raster
|
75
|
+
im1=image/x-sun-raster
|
76
|
+
im24=image/x-sun-raster
|
77
|
+
im32=image/x-sun-raster
|
78
|
+
im8=image/x-sun-raster
|
79
|
+
img=image/x-vivid
|
80
|
+
ind=application/vnd.adobe-indesign
|
81
|
+
indd=application/vnd.adobe-indesign
|
82
|
+
jpe=image/jpeg
|
83
|
+
jpeg=image/jpeg
|
84
|
+
jpg=image/jpeg
|
85
|
+
jp2=image/jp2
|
86
|
+
js=application/x-javascript
|
87
|
+
kar=audio/midi
|
88
|
+
lan=application/x-erdas-lan
|
89
|
+
latex=application/x-latex
|
90
|
+
lbm=image/x-iff-ilbm
|
91
|
+
mac=image/x-iff-ilbm
|
92
|
+
mcw=application/vnd.ms-word
|
93
|
+
mdb=application/x-msaccess
|
94
|
+
mht=text/html
|
95
|
+
mid=audio/midi
|
96
|
+
midi=audio/midi
|
97
|
+
mif=application/vnd.framemaker-mif
|
98
|
+
mov=video/quicktime
|
99
|
+
movie=video/x-sgi-movie
|
100
|
+
mp2=audio/x-mpeg
|
101
|
+
MP2=audio/x-mpeg
|
102
|
+
mp3=audio/x-mpeg
|
103
|
+
mpa=audio/x-mpeg,video/x-mpeg
|
104
|
+
mpe=video/mpeg
|
105
|
+
mpeg=video/mpeg
|
106
|
+
mpg=video/mpeg
|
107
|
+
mpga=audio/x-mpeg
|
108
|
+
ms-powerpoint=application/vnd.ms-powerpoint
|
109
|
+
msword=application/vnd.ms-word
|
110
|
+
mtv=image/x-mtv-ray
|
111
|
+
navb=image/x-navb
|
112
|
+
octet-stream=application/octet-stream
|
113
|
+
oda=application/x-pfsWrite
|
114
|
+
pat=image/x-us-patent
|
115
|
+
pbm=image/x-portable-bitmap
|
116
|
+
pcd=image/x-photo-cd
|
117
|
+
pcl=application/vnd.hp-PCL
|
118
|
+
pcx=image/x-pcx
|
119
|
+
pdf=application/pdf
|
120
|
+
pds=application/vnd.NASA-PDS
|
121
|
+
pgm=image/x-portable-graymap
|
122
|
+
photo=text/plain
|
123
|
+
php=text/plain
|
124
|
+
pic=image/x-macintosh-pict
|
125
|
+
pict=image/x-macintosh-pict
|
126
|
+
pjpeg=image/jpeg
|
127
|
+
pl=text/plain
|
128
|
+
pm=image/x-ibm-picture-mkr
|
129
|
+
png=image/png
|
130
|
+
pnm=image/x-portable-anymap
|
131
|
+
ppm=image/x-portable-pixmap
|
132
|
+
pps=application/vnd.ms-powerpoint
|
133
|
+
ppt=application/vnd.ms-powerpoint
|
134
|
+
ppz=application/vnd.ms-powerpoint
|
135
|
+
prn=image/x-calcomp-ccrf
|
136
|
+
ps=application/postscript
|
137
|
+
psd=application/vnd.adobe-photoshop
|
138
|
+
puzzle=image/x-puzzle
|
139
|
+
pzl=image/x-puzzle
|
140
|
+
qdv=image/x-qdv
|
141
|
+
qpr=application/quattroPro
|
142
|
+
qt=video/quicktime
|
143
|
+
qxd=application/vnd.quark
|
144
|
+
ra=audio/vnd.realaudio
|
145
|
+
ram=audio/x-pn-realaudio
|
146
|
+
ras=image/x-sun-raster
|
147
|
+
rast=image/x-qdv
|
148
|
+
raw=application/vnd.adobe-photoshop
|
149
|
+
rg=image/x-raster-graphics
|
150
|
+
rgb=image/x-rgb
|
151
|
+
rix=image/x-rix
|
152
|
+
rlc=image/x-rlc
|
153
|
+
rle=image/x-utah-raster
|
154
|
+
rm=audio/x-realmedia,video/vnd.realvideo
|
155
|
+
rnc=application/vnd.relax-ng.rnc
|
156
|
+
rtf=text/richtext
|
157
|
+
rtl=application/vnd.hp-RTL
|
158
|
+
scd=image/x-scodl
|
159
|
+
scx=image/x-rix
|
160
|
+
sea=application/sit
|
161
|
+
sgi=image/x-sgi-image
|
162
|
+
sgm=text/sgml
|
163
|
+
sgml=text/sgml
|
164
|
+
shtml=text/html
|
165
|
+
sit=application/sit
|
166
|
+
smil=application/smil
|
167
|
+
snd=audio/basic
|
168
|
+
spl=application/futuresplash
|
169
|
+
spr=application/x-sprite
|
170
|
+
sprite=application/x-sprite
|
171
|
+
sst=image/x-avhrr
|
172
|
+
svp=video/x-pn-realvideo
|
173
|
+
swf=application/x-shockwave-flash
|
174
|
+
tar=application/x-tar
|
175
|
+
tbk=application/toolbook
|
176
|
+
tex=application/x-tex
|
177
|
+
texi=application/x-FirstChoice
|
178
|
+
texinfo=application/x-FirstChoice
|
179
|
+
tga=image/x-targa
|
180
|
+
tif=image/tiff
|
181
|
+
tiff=image/tiff
|
182
|
+
tsp=application/x-SmartDraw
|
183
|
+
tsv=text/tab-separated-values
|
184
|
+
txt=text/plain
|
185
|
+
ustar=application/x-wordstar
|
186
|
+
vi=image/x-jovian-vi
|
187
|
+
vif=image/x-verity-image
|
188
|
+
vit=image/x-vitec
|
189
|
+
vrml=x-world/x-vrml
|
190
|
+
wav=audio/x-wave
|
191
|
+
wk1=application/vnd.lotus1-2-3
|
192
|
+
wk3=application/vnd.lotus1-2-3
|
193
|
+
wk4=application/vnd.lotus1-2-3
|
194
|
+
wks=application/vnd.lotus1-2-3
|
195
|
+
wma=audio/x-ms-wma
|
196
|
+
wmv=video/x-ms-wmv
|
197
|
+
wpd=application/wordperfect
|
198
|
+
wpg=application/x-wordperfect-graphic
|
199
|
+
wrl=x-world/x-vrml
|
200
|
+
xbm=image/x-xbitmap
|
201
|
+
xls=application/vnd.ms-excel
|
202
|
+
xml=text/xml
|
203
|
+
xsd=text/xml
|
204
|
+
xsl=text/xml
|
205
|
+
xpm=image/x-xpixmap
|
206
|
+
x-png=image/png
|
207
|
+
x-shockwave-flash=application/x-shockwave-flash
|
208
|
+
xwd=image/x-xwindowdump
|
209
|
+
zip=application/zip
|
@@ -0,0 +1,225 @@
|
|
1
|
+
JHOVE - JSTOR/Harvard Object Validation Environment
|
2
|
+
Copyright 2003-2008 by JSTOR and the President and Fellows of Harvard College
|
3
|
+
JHOVE is made available under the GNU Lesser General Public License (LGPL;
|
4
|
+
see the file LICENSE for details)
|
5
|
+
|
6
|
+
Rev. 1.2, 2009-02-10
|
7
|
+
|
8
|
+
JHOVE (the JSTOR/Harvard Object Validation Environment, pronounced "jhove")
|
9
|
+
is an extensible software framework for performing format identification,
|
10
|
+
validation, and characterization of digital objects.
|
11
|
+
|
12
|
+
o Format identification is the process of determining the format to which a
|
13
|
+
digital object conforms: "I have a digital object; what format is it?"
|
14
|
+
o Format validation is the process of determining the level of compliance of a
|
15
|
+
digital object to the specification for its purported format: "I have an
|
16
|
+
object purportedly of format F; is it?"
|
17
|
+
o Format characterization is the process of determing the format-specific
|
18
|
+
significant properties of an object of a given format: "I have an object of
|
19
|
+
format F; what are its salient properties?"
|
20
|
+
|
21
|
+
These actions are frequently necessary during routine operation of digital
|
22
|
+
repositories and for digital preservation activities.
|
23
|
+
|
24
|
+
The output from JHOVE is controlled by output handlers. JHOVE uses an
|
25
|
+
extensible plug-in architecture; it can be configured at the time of its
|
26
|
+
invocation to include whatever specific format modules and output handlers
|
27
|
+
that are desired. The initial release of JHOVE includes modules for
|
28
|
+
arbitrary byte streams, ASCII and UTF-8 encoded text, AIFF and WAVE audio,
|
29
|
+
GIF, JPEG, JPEG 2000, TIFF, and PDF; and text and XML output handlers.
|
30
|
+
|
31
|
+
The JHOVE project is a collaboration of JSTOR and the Harvard University
|
32
|
+
Library. Development of JHOVE was funded in part by the Andrew W. Mellon
|
33
|
+
Foundation. JHOVE is made available under the GNU Lesser General Public
|
34
|
+
License (LGPL; see the file LICENSE for details).
|
35
|
+
|
36
|
+
REQUIREMENTS
|
37
|
+
|
38
|
+
1. Java J2SE 1.4
|
39
|
+
(JHOVE was originally implemented using the Sun J2SE SDK 1.4.1 and has
|
40
|
+
been tested to work with 1.4.2 <http://java.sun.com/j2se/1.4.2/>)
|
41
|
+
|
42
|
+
2. If you would like to compile the JHOVE source code, then
|
43
|
+
Apache Ant, a Java-based build tool <http://ant.apache.org/> is necessary.
|
44
|
+
Note that the JAVA_HOME environment variable must be set appropriately for
|
45
|
+
Ant to work properly.
|
46
|
+
(JHOVE was implemented and tested using Ant 1.5.1.)
|
47
|
+
|
48
|
+
DISTRIBUTION
|
49
|
+
|
50
|
+
The JHOVE distribution package includes:
|
51
|
+
|
52
|
+
jhove/ # JHOVE home directory
|
53
|
+
COPYING # GNU Lesser General Public License
|
54
|
+
LICENSE # JHOVE license information
|
55
|
+
README
|
56
|
+
RELEASENOTES # JHOVE release notes
|
57
|
+
bin/
|
58
|
+
jhove.jar # JHOVE API package
|
59
|
+
jhove-handler.jar # Standard output handler package
|
60
|
+
jhove-module.jar # Standard module package
|
61
|
+
JhoveApp.jar # JHOVE command line application
|
62
|
+
JhoveView.jar # JHOVE with Swing GUI front-end
|
63
|
+
build.xml # Ant configuration file
|
64
|
+
classes/
|
65
|
+
build.xml # Ant configuration file
|
66
|
+
edu/ ... # JHOVE API packages
|
67
|
+
ADump.* # AIFF dump utility class
|
68
|
+
GDump.* # GIF dump utility class
|
69
|
+
Jhove.* # JHOVE main class
|
70
|
+
JDump.* # JPEG dump utility class
|
71
|
+
J2Dump.* # JPEG 2000 dump utility class
|
72
|
+
PDump.* # PDF dump utility class
|
73
|
+
TDump.* # TIFF dump utility class
|
74
|
+
UserHome.* # user.home property utility class
|
75
|
+
WDump.* # WAVE dump utility class
|
76
|
+
conf/
|
77
|
+
jhove.conf # JHOVE configuration file
|
78
|
+
jhove.xsd # JHOVE output schema
|
79
|
+
jhoveConfig.xsd # JHOVE configuration file schema
|
80
|
+
doc/
|
81
|
+
*.html # API documentation
|
82
|
+
...
|
83
|
+
examples/ # Sample files
|
84
|
+
ascii/ ...
|
85
|
+
gif/ ...
|
86
|
+
jpeg/ ...
|
87
|
+
jpeg2000/ ...
|
88
|
+
pdf/ ...
|
89
|
+
tiff/ ...
|
90
|
+
utf-8/ ...
|
91
|
+
adump* # AIFF dump Bourne shell driver
|
92
|
+
adump.bat* # AIFF dump DOS shell driver script
|
93
|
+
gdump* # GIF dump Bourne shell driver
|
94
|
+
gdump.bat* # GIF dump DOS shell driver script
|
95
|
+
jdump* # JPEG dump Bourne shell driver
|
96
|
+
jdump.bat* # JPEG dump DOS shell driver script
|
97
|
+
j2dump* # JPEG 2000 dump Bourne shell driver
|
98
|
+
j2dump.bat* # JPEG 2000 dump DOS shell driver
|
99
|
+
jhove.tmpl* # Template for JHOVE Bourne shell driver script
|
100
|
+
jhove_bat.tmpl* # Template for JHOVE DOS shell driver script
|
101
|
+
pdump* # PDF dump Bourne shell driver
|
102
|
+
pdump.bat* # PDF dump DOS shell driver script
|
103
|
+
tdump* # TIFF dump Bourne shell driver
|
104
|
+
tdump.bat* # TIFF dump DOS shell driver script
|
105
|
+
userhome* # user.home Bourne shell driver
|
106
|
+
userhome.bat* # user.home DOS shell driver script
|
107
|
+
wdump* # WAVE dump Bourne shell driver
|
108
|
+
wdump.bat* # WAVE dump DOS shell driver script
|
109
|
+
|
110
|
+
INSTALLATION
|
111
|
+
|
112
|
+
Edit the configuration file, jhove/conf/jhove.conf, and set the absolute
|
113
|
+
pathname of the JHOVE home directory and the temporary directory (in which
|
114
|
+
temporary files are created):
|
115
|
+
|
116
|
+
<jhoveHome>jhove-home-directory</jhoveHome>
|
117
|
+
<tempDirectory>temporary-directory</tempDirectory>
|
118
|
+
|
119
|
+
The JHOVE home directory is the top-most directory in the distribution TAR
|
120
|
+
or ZIP file. On Unix systems, "/var/tmp" is an appropriate temporary
|
121
|
+
directory; on Windows, "C:\Temp". For example, if the distribution TAR
|
122
|
+
file is disaggregated on a Unix system in the directory "/users/stephen/
|
123
|
+
projects", then the configuration file should read:
|
124
|
+
|
125
|
+
<jhoveHome>/users/stephen/projects/jhove</jhoveHome>
|
126
|
+
<tempDirectory>/var/tmp</jhoveHome>
|
127
|
+
|
128
|
+
In the JHOVE home directory, copy the JHOVE Bourne shell driver script
|
129
|
+
template, "jhove.tmpl", to "jhove" (or the equivalent Windows shell
|
130
|
+
script, "jhove_bat.tmpl" to "jhove.bat"), and set the
|
131
|
+
JHOVE home directory, Java home directory, and Java interpreter:
|
132
|
+
|
133
|
+
JHOVE_HOME=jhove-home-directory
|
134
|
+
JAVA_HOME=java-home-directory
|
135
|
+
JAVA=java-interpreter
|
136
|
+
|
137
|
+
The JAVA_HOME property should provide the absolute pathname of the Java
|
138
|
+
runtime or SDK installation; JAVA should provide the absolute pathname of the
|
139
|
+
Java interpreter. For example:
|
140
|
+
|
141
|
+
JHOVE_HOME=/users/stephen/projects/jhove
|
142
|
+
JAVA_HOME=/usr/local/j2re1.4.1_02
|
143
|
+
JAVA=$JAVA_HOME/bin/java
|
144
|
+
|
145
|
+
In the DOS shell driver script, jhove.bat, the equivalent three
|
146
|
+
variables are:
|
147
|
+
|
148
|
+
SET JHOVE_HOME=jhove-home-directory
|
149
|
+
SET JAVA_HOME=java-home-directory
|
150
|
+
SET JAVA=%JAVA_HOME%\bin\java
|
151
|
+
|
152
|
+
For example:
|
153
|
+
|
154
|
+
SET JHOVE_HOME="C:\Program Files\jhove"
|
155
|
+
SET JAVA_HOME="C:\Program Files\java\j2re1.4.1_02"
|
156
|
+
SET JAVA=%JAVA_HOME%\bin\java
|
157
|
+
|
158
|
+
The quotation marks are necessary because of the embedded space characters.
|
159
|
+
On Windows platforms it may also be necessary to add the Java bin subdirectory
|
160
|
+
to the System PATH environment variable:
|
161
|
+
|
162
|
+
PATH=C:\Program Files\java\j2re1.4.1_02\bin;...
|
163
|
+
|
164
|
+
(For information on setting a Windows environment variable, consult your local
|
165
|
+
documentation or system administrator.)
|
166
|
+
|
167
|
+
USAGE
|
168
|
+
|
169
|
+
java Jhove [-c config] [-m module] [-h handler] [-e encoding] [-H handler]
|
170
|
+
[-o output] [-x saxclass] [-t tempdir] [-b bufsize]
|
171
|
+
[-l loglevel] [[-krs] dir-file-or-uri [...]]
|
172
|
+
|
173
|
+
where -c config Configuration file pathname
|
174
|
+
-m module Module name
|
175
|
+
-h handler Output handler name (defaults to TEXT)
|
176
|
+
-e encoding Character encoding used by output handler (defaults to UTF-8)
|
177
|
+
-H handler About handler name
|
178
|
+
-o output Output file pathname (defaults to standard output)
|
179
|
+
-x saxclass SAX parser class (defaults to J2SE 1.4 default)
|
180
|
+
-t tempdir Temporary directory in which to create temporary files
|
181
|
+
-b bufsize Buffer size for buffered I/O (defaults to J2SE 1.4 default)
|
182
|
+
-l loglevel Logging level
|
183
|
+
-k Calculate CRC32, MD5, and SHA-1 checksums
|
184
|
+
-r Display raw data flags, not textual equivalents
|
185
|
+
-s Format identification based on internal signatures only
|
186
|
+
dir-file-or-uri Directory or file pathname or URI of formated content
|
187
|
+
stream
|
188
|
+
|
189
|
+
All named modules and output handlers must be found on the Java CLASSPATH at
|
190
|
+
the time of invocation. The JHOVE driver script, jhove/jhove, automatically
|
191
|
+
sets the CLASSPATH and invokes the Jhove main class:
|
192
|
+
|
193
|
+
jhove [-c config] [-m module] [-h handler] [-e encoding] [-H handler]
|
194
|
+
[-o output] [-x saxclass] [-t tempdir] [-b bufsize] [-l loglevel]
|
195
|
+
[[-krs] dir-file-or-uri [...]]
|
196
|
+
|
197
|
+
The following additional programs are available, primarily for testing
|
198
|
+
and debugging purposes. They display a minimally processed, human-readable
|
199
|
+
version of the contents of AIFF, GIF, JPEG, JPEG 2000, PDF, TIFF, and WAVE
|
200
|
+
files:
|
201
|
+
|
202
|
+
java ADump aiff-file
|
203
|
+
java GDump gif-file
|
204
|
+
java JDump jpeg-file
|
205
|
+
java J2Dump jpeg2000-file
|
206
|
+
java PDump pdf-file
|
207
|
+
java TDump tiff-file
|
208
|
+
java WDump wave-file
|
209
|
+
|
210
|
+
For convenience, the following driver scripts are also available:
|
211
|
+
|
212
|
+
adump aiff-file
|
213
|
+
gdump gif-file
|
214
|
+
jdump jpeg-file
|
215
|
+
j2dump jpeg2000-file
|
216
|
+
pdump pdf-file
|
217
|
+
tdump tiff-file
|
218
|
+
wdump wave-file
|
219
|
+
|
220
|
+
The JHOVE Swing-based GUI interface can be invoked from a command shell from
|
221
|
+
the jhove/bin sub-directory:
|
222
|
+
|
223
|
+
java -jar JhoveView.jar -c <configFile>
|
224
|
+
|
225
|
+
where <configFile> is the pathname of the JHOVE configuration file.
|
@@ -0,0 +1,63 @@
|
|
1
|
+
JHOVE - JSTOR/Harvard Object Validation Environment
|
2
|
+
Copyright 2003-2007 by JSTOR and the President and Fellows of Harvard College
|
3
|
+
JHOVE is made available under the GNU General Public License (see the file
|
4
|
+
LICENSE for details)
|
5
|
+
|
6
|
+
Rev. 2007-08-30
|
7
|
+
|
8
|
+
Edit the configuration file, jhove.conf, and set the JHOVE home
|
9
|
+
directory:
|
10
|
+
|
11
|
+
<jhoveHome>jhove-home-directory</jhoveHome>
|
12
|
+
|
13
|
+
and temporary directory:
|
14
|
+
|
15
|
+
<tempDirectory>temporary-directory</tempDirectory>
|
16
|
+
|
17
|
+
On most Unix systems, a reasonable temporary directory is "/var/tmp";
|
18
|
+
on Windows, "C:\temp".
|
19
|
+
|
20
|
+
The optional
|
21
|
+
|
22
|
+
<bufferSize>buffer-size</bufferSize>
|
23
|
+
|
24
|
+
element defines the buffer size used for buffer I/O operations.
|
25
|
+
|
26
|
+
The optional
|
27
|
+
|
28
|
+
<mixVersion>1.0</mixVersion>
|
29
|
+
|
30
|
+
element specifies that the XML output handler should conform to the
|
31
|
+
MIX 1.0 schema. The default behavior is for handler output to conform
|
32
|
+
to the MIX 0.2 schema.
|
33
|
+
|
34
|
+
The optional
|
35
|
+
|
36
|
+
<sigBytes>n</sigBytes>
|
37
|
+
|
38
|
+
element specifies that JHOVE modules will look for format signatures
|
39
|
+
in the first <n> bytes of the file. The default value is 1024.
|
40
|
+
|
41
|
+
All class names must be fully qualified with their package name:
|
42
|
+
|
43
|
+
<module>
|
44
|
+
<class>fully-package-qualified-class-name</class>
|
45
|
+
<init>optional-initialization-argument</init>
|
46
|
+
<param>optional-invocation-argument</param>
|
47
|
+
</module>
|
48
|
+
|
49
|
+
The optional <init> argument is passed to the module once at the time
|
50
|
+
its class is instantiated. See module-specific documentation for a
|
51
|
+
description of any initialization options.
|
52
|
+
|
53
|
+
The optional <param> argument is passed to the module every time it is
|
54
|
+
invoked. See module-specific documentation for a description of any
|
55
|
+
invocation options.
|
56
|
+
|
57
|
+
The order in which format modules are defined is important; when
|
58
|
+
performing a format identification operation, JHOVE will search for a
|
59
|
+
matching module in the order in which the modules are defined in the
|
60
|
+
configuration file. In general, the modules for more generic formats
|
61
|
+
should come later in the list. For example, the standard module ASCII
|
62
|
+
should be defined before the UTF-8 module, since all ASCII objects
|
63
|
+
are, by definition, UTF-8 objects, but not vice versa.
|
data/bin/jhove.conf
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<jhoveConfig version="1.1"
|
3
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
4
|
+
xmlns="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig"
|
5
|
+
xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig
|
6
|
+
http://cosimo.stanford.edu/standards/jhove/v1/jhoveConfig.xsd">
|
7
|
+
<jhoveHome></jhoveHome>
|
8
|
+
<defaultEncoding>utf-8</defaultEncoding>
|
9
|
+
<tempDirectory>/tmp</tempDirectory>
|
10
|
+
<bufferSize>131072</bufferSize>
|
11
|
+
<mixVersion>1.0</mixVersion>
|
12
|
+
<sigBytes>1024</sigBytes>
|
13
|
+
<module>
|
14
|
+
<class>edu.harvard.hul.ois.jhove.module.AiffModule</class>
|
15
|
+
</module>
|
16
|
+
<module>
|
17
|
+
<class>edu.harvard.hul.ois.jhove.module.WaveModule</class>
|
18
|
+
</module>
|
19
|
+
<module>
|
20
|
+
<class>edu.harvard.hul.ois.jhove.module.PdfModule</class>
|
21
|
+
</module>
|
22
|
+
<module>
|
23
|
+
<class>edu.harvard.hul.ois.jhove.module.Jpeg2000Module</class>
|
24
|
+
</module>
|
25
|
+
<module>
|
26
|
+
<class>edu.harvard.hul.ois.jhove.module.JpegModule</class>
|
27
|
+
</module>
|
28
|
+
<module>
|
29
|
+
<class>edu.harvard.hul.ois.jhove.module.GifModule</class>
|
30
|
+
</module>
|
31
|
+
<module>
|
32
|
+
<class>edu.harvard.hul.ois.jhove.module.TiffModule</class>
|
33
|
+
</module>
|
34
|
+
<module>
|
35
|
+
<class>edu.harvard.hul.ois.jhove.module.XmlModule</class>
|
36
|
+
</module>
|
37
|
+
<module>
|
38
|
+
<class>edu.harvard.hul.ois.jhove.module.HtmlModule</class>
|
39
|
+
</module>
|
40
|
+
<module>
|
41
|
+
<class>edu.harvard.hul.ois.jhove.module.AsciiModule</class>
|
42
|
+
</module>
|
43
|
+
<module>
|
44
|
+
<class>edu.harvard.hul.ois.jhove.module.Utf8Module</class>
|
45
|
+
</module>
|
46
|
+
<outputHandler>
|
47
|
+
<class>edu.harvard.hul.ois.jhove.handler.XmlHandler</class>
|
48
|
+
</outputHandler>
|
49
|
+
|
50
|
+
</jhoveConfig>
|
Binary file
|
data/bin/jhoveToolkit.sh
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
JHOVE_HOME=`dirname $0`
|
4
|
+
export JHOVE_HOME
|
5
|
+
JAVA_HOME=/etc/alternatives/jre
|
6
|
+
JAVA=/usr/bin/java
|
7
|
+
|
8
|
+
CP=${JHOVE_HOME}/jhoveToolkit.jar:${JHOVE_HOME}/JhoveApp.jar
|
9
|
+
|
10
|
+
# Retrieve a copy of all command line arguments to pass to the application.
|
11
|
+
# Since looping over the positional parameters is such a common thing to do in scripts,
|
12
|
+
# for arg
|
13
|
+
# defaults to
|
14
|
+
# for arg in "$@".
|
15
|
+
# The double-quoted "$@" is special magic that causes each parameter to be used as a single word
|
16
|
+
|
17
|
+
ARGS=""
|
18
|
+
for ARG do
|
19
|
+
ARGS="$ARGS $ARG"
|
20
|
+
done
|
21
|
+
|
22
|
+
# Set the CLASSPATH and invoke the Java loader.
|
23
|
+
${JAVA} -Xms128M -Xmx3000M -classpath $CP $ARGS
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'pathname'
|
3
|
+
require 'jhove_technical_metadata'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
class JhoveService
|
7
|
+
|
8
|
+
# @return [Pathname] The directory in which program files are located
|
9
|
+
attr_accessor :bin_pathname
|
10
|
+
|
11
|
+
# @return [Pathname] The directory in which output should be generated
|
12
|
+
attr_accessor :target_pathname
|
13
|
+
|
14
|
+
# @return [String] The druid of the object, which gets inserted in the root element of the output
|
15
|
+
attr_accessor :digital_object_id
|
16
|
+
|
17
|
+
# @param [String] target_dir The directory into which output should be generated
|
18
|
+
def initialize(target_dir=nil)
|
19
|
+
@target_pathname = Pathname.new(target_dir) unless target_dir.nil?
|
20
|
+
@bin_pathname = Pathname.new(File.expand_path(File.dirname(__FILE__) + '/../bin'))
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [String] The output file from the JHOVE run
|
24
|
+
def jhove_output
|
25
|
+
@target_pathname.join('jhove_output.xml')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String] The technicalMetadata.xml output file path
|
29
|
+
def tech_md_output
|
30
|
+
@target_pathname.join('technicalMetadata.xml')
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param content_dir [Pathname,String] the directory path containing the files to be analyzed by JHOVE
|
34
|
+
# @param fileset_file [Pathname,String] the pathname of the file listing which files should be processed. If nil, process all files.
|
35
|
+
# @return [String] Run JHOVE to characterize all content files, returning the output file path
|
36
|
+
def run_jhove(content_dir, fileset_file=nil)
|
37
|
+
`#{get_jhove_command(content_dir, fileset_file)}`
|
38
|
+
exitcode = $?.exitstatus
|
39
|
+
if (exitcode != 0)
|
40
|
+
raise "Error when running JHOVE against #{content_dir.to_s}"
|
41
|
+
end
|
42
|
+
jhove_output.to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
# @param content_dir [Pathname,String] the directory path containing the files to be analyzed by JHOVE
|
46
|
+
# @param fileset_file [Pathname,String] the pathname of the file listing which files should be processed. If nil, process all files.
|
47
|
+
# @return [String] The jhove-toolkit command to be exectuted in a system call
|
48
|
+
def get_jhove_command(content_dir, fileset_file=nil)
|
49
|
+
if fileset_file.nil?
|
50
|
+
args = "edu.stanford.sulair.jhove.JhoveCommandLine #{content_dir.to_s}"
|
51
|
+
else
|
52
|
+
args = "edu.stanford.sulair.jhove.JhoveFileset #{content_dir.to_s} #{fileset_file.to_s}"
|
53
|
+
end
|
54
|
+
jhove_script = @bin_pathname.join('jhoveToolkit.sh').to_s
|
55
|
+
jhove_cmd = "#{jhove_script} #{args} > #{jhove_output.to_s}"
|
56
|
+
jhove_cmd
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param [Pathname,String] jhove_pathname The full path of the file containing JHOVE output to be transformed to technical metadata
|
60
|
+
# @return [String] Convert jhove output it to technicalMetadata, returning the output file path
|
61
|
+
def create_technical_metadata(jhove_pathname=jhove_output)
|
62
|
+
jhove_pathname = Pathname.new(jhove_pathname)
|
63
|
+
jhovetm = JhoveTechnicalMetadata.new()
|
64
|
+
jhovetm.digital_object_id=self.digital_object_id
|
65
|
+
jhovetm.output_file=tech_md_output
|
66
|
+
# Create a SAX parser
|
67
|
+
parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
|
68
|
+
# Feed the parser some XML
|
69
|
+
parser.parse(jhove_pathname.open('rb'))
|
70
|
+
tech_md_output.to_s
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param [String] old_tm the old techMD xml to be transformed to new technical metadata format
|
74
|
+
# @return [String] Convert old techMD date to new technicalMetadata format
|
75
|
+
def upgrade_technical_metadata(old_tm)
|
76
|
+
new_tm = StringIO.new()
|
77
|
+
upgrade_sax_handler = JhoveTechnicalMetadata.new()
|
78
|
+
upgrade_sax_handler.digital_object_id=self.digital_object_id
|
79
|
+
upgrade_sax_handler.ios = new_tm
|
80
|
+
# Create a SAX parser
|
81
|
+
parser = Nokogiri::XML::SAX::Parser.new(upgrade_sax_handler)
|
82
|
+
# Feed the parser some XML
|
83
|
+
parser.parse(old_tm)
|
84
|
+
new_tm.string
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# @return [void] Cleanup the temporary workspace used to hold the metadata outputs
|
89
|
+
def cleanup()
|
90
|
+
jhove_output.delete if jhove_output.exist?
|
91
|
+
tech_md_output.delete if tech_md_output.exist?
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
@@ -0,0 +1,284 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'time'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
# A SAX handler for filtering JHOVE output to create a technicalMetadata datastream
|
7
|
+
# The previous mechanism (uising XSLT tranformation) was causing out of memory errors,
|
8
|
+
# due to XSLT's behavior of loading both the input and output objects into memory.
|
9
|
+
class JhoveTechnicalMetadata < Nokogiri::XML::SAX::Document
|
10
|
+
|
11
|
+
# @return [IO] the output stream for the result
|
12
|
+
attr_accessor :ios
|
13
|
+
|
14
|
+
# @return [String] The druid of the object, which gets inserted in the root element of the output
|
15
|
+
attr_accessor :digital_object_id
|
16
|
+
|
17
|
+
def initialize()
|
18
|
+
@indent = 0
|
19
|
+
@ios = STDOUT #File.open(STDOUT, 'w')
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [Pathname] pathname the location of the technicalMetadata.xml file to be created
|
23
|
+
# @return [void] Opens the output stream pointing to the specified file
|
24
|
+
def output_file=(pathname)
|
25
|
+
@ios = pathname.open('w')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param [String] string The character string to be appended to the output
|
29
|
+
# @return [void] Append the specified string to the output stream
|
30
|
+
def output(string)
|
31
|
+
@ios.puts " "*@indent + string
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param [String] tag the name of the XML element from the parsed input
|
35
|
+
# @param [Hash] attrs the XML attributes of the element
|
36
|
+
# @return [void] this method is called by the sax parser at the beginning of an element
|
37
|
+
def start_element(tag, attrs = [])
|
38
|
+
case tag
|
39
|
+
when 'jhove'
|
40
|
+
# <jhove> is the root element of the input
|
41
|
+
root_open(attrs)
|
42
|
+
when 'repInfo'
|
43
|
+
# A <repInfo> element contains the data for each file
|
44
|
+
file_wrapper_open(attrs)
|
45
|
+
when 'properties'
|
46
|
+
# A <properties> element contains the variable data for the file
|
47
|
+
properties_open
|
48
|
+
else
|
49
|
+
if tag[0..2] == 'mix'
|
50
|
+
# JHOVE output for image files contains tech md in MIX format that we copy verbatum to output
|
51
|
+
mix_open(tag)
|
52
|
+
elsif @in_jhove
|
53
|
+
# we've encountered one of the JHOVE elements that we want to automatically copy
|
54
|
+
jhove_open(tag, attrs)
|
55
|
+
elsif @in_properties
|
56
|
+
# we're looking for the LineEndings property in the JHOVE output
|
57
|
+
linebreak_open(tag)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param [String] tag the value of a text node found in the parsed XML
|
63
|
+
# @return [void] this method is called by the sax parser when a text node is encountered
|
64
|
+
def characters(string)
|
65
|
+
@text = string
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param [String] tag the name of the XML element from the parsed input
|
69
|
+
# @return [void] this method is called by the sax parser at the end of an element
|
70
|
+
def end_element(tag)
|
71
|
+
case tag
|
72
|
+
when 'jhove'
|
73
|
+
root_close
|
74
|
+
when 'repInfo'
|
75
|
+
file_wrapper_close
|
76
|
+
when 'properties'
|
77
|
+
properties_close
|
78
|
+
else
|
79
|
+
if tag[0..2] == 'mix'
|
80
|
+
mix_close(tag)
|
81
|
+
elsif @in_jhove
|
82
|
+
jhove_close(tag)
|
83
|
+
elsif @in_properties
|
84
|
+
linebreak_close(tag)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
90
|
+
# @return [void] create the <technicalMetadata> root element of the XML output and include namespace declararions
|
91
|
+
def root_open(attrs)
|
92
|
+
if @digital_object_id
|
93
|
+
output "<technicalMetadata objectId='#{@digital_object_id}' datetime='#{Time.now.utc.iso8601}'"
|
94
|
+
else
|
95
|
+
output "<technicalMetadata datetime='#{Time.now.utc.iso8601}'"
|
96
|
+
end
|
97
|
+
@indent += 2
|
98
|
+
output "xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'"
|
99
|
+
output "xmlns:mix='http://www.loc.gov/mix/v10'"
|
100
|
+
output "xmlns:textmd='info:lc/xmlns/textMD-v3' >"
|
101
|
+
@indent -= 1
|
102
|
+
end
|
103
|
+
|
104
|
+
# @return [void] add the closing element of the output document
|
105
|
+
def root_close
|
106
|
+
@indent -= 1
|
107
|
+
output "</technicalMetadata>"
|
108
|
+
@ios.close
|
109
|
+
end
|
110
|
+
|
111
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
112
|
+
# @return [void] Append a <file> element to the output, setting the id attribute to the file path
|
113
|
+
def file_wrapper_open(attrs)
|
114
|
+
filepath=nil
|
115
|
+
attrs.each { |attr| filepath=attr[1] if attr[0]=='uri'}
|
116
|
+
output "<file id='#{filepath}'>"
|
117
|
+
@indent += 1
|
118
|
+
@in_jhove = true
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return [void] Append a </file> tag to close the file data,
|
122
|
+
# but first inset a textMD stanza if the file has a text format
|
123
|
+
def file_wrapper_close
|
124
|
+
case @format
|
125
|
+
when 'ASCII', 'HTML','TEXT','UTF-8'
|
126
|
+
output_textmd(@linebreak)
|
127
|
+
end
|
128
|
+
@indent -= 1
|
129
|
+
output " </jhove:properties>" if @in_properties
|
130
|
+
|
131
|
+
output "</file>"
|
132
|
+
@in_jhove = false
|
133
|
+
@in_properties=false
|
134
|
+
end
|
135
|
+
|
136
|
+
# @param [String] tag the name of the XML element from the parsed input
|
137
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
138
|
+
# @return [void] Copy this jhove element tag and its attributes verbatum
|
139
|
+
def jhove_open(tag, attrs)
|
140
|
+
if @jhove_tag # saved previously
|
141
|
+
# we encountered a new element so output what was previously cached
|
142
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}>"
|
143
|
+
@indent += 1
|
144
|
+
end
|
145
|
+
# cache the element name and its attributes
|
146
|
+
@jhove_tag = tag
|
147
|
+
@jhove_attrs = ""
|
148
|
+
attrs.each do |attr|
|
149
|
+
@jhove_attrs += " #{attr[0]}='#{attr[1]}'"
|
150
|
+
end
|
151
|
+
@text = nil
|
152
|
+
@linebreak='LF'
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param [String] tag the name of the XML element from the parsed input
|
156
|
+
# @return [void] Output a closing tag, preceded by cached data, if such exists
|
157
|
+
def jhove_close(tag)
|
158
|
+
if @text && tag == @jhove_tag
|
159
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}>#{@text}</jhove:#{tag}>"
|
160
|
+
elsif tag == @jhove_tag
|
161
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}/>"
|
162
|
+
else
|
163
|
+
@indent -=1
|
164
|
+
output "</jhove:#{tag}>"
|
165
|
+
end
|
166
|
+
@format = @text if tag == 'format'
|
167
|
+
@text = nil
|
168
|
+
@jhove_tag = nil
|
169
|
+
@jhove_attrs=""
|
170
|
+
end
|
171
|
+
|
172
|
+
# @return [void] Output a <properties> element if one was encountered in the input,
|
173
|
+
# then ignore most input data from within the properties element, except mix and LineBreaks
|
174
|
+
def properties_open
|
175
|
+
output "<jhove:properties>"
|
176
|
+
@indent += 1
|
177
|
+
@in_jhove = false
|
178
|
+
@in_properties=true
|
179
|
+
end
|
180
|
+
|
181
|
+
# @return [void] Appending of a closing tag is handled elsewhere
|
182
|
+
def properties_close
|
183
|
+
@indent -= 1
|
184
|
+
end
|
185
|
+
|
186
|
+
# @param [String] tag the name of the XML element from the parsed input
|
187
|
+
# @return [void] Copy any Mix data verbatum,
|
188
|
+
def mix_open(tag)
|
189
|
+
if @mix_tag
|
190
|
+
# we encountered a new element so output what was previously cached
|
191
|
+
output "<#{@mix_tag}>"
|
192
|
+
@indent += 1
|
193
|
+
end
|
194
|
+
# cache the element name
|
195
|
+
@mix_tag = tag
|
196
|
+
@text = nil
|
197
|
+
end
|
198
|
+
|
199
|
+
# @param [String] tag the name of the XML element from the parsed input
|
200
|
+
# @return [void] Output a closing tag, preceded by cached data, if such exists
|
201
|
+
def mix_close(tag)
|
202
|
+
if @text && tag == @mix_tag
|
203
|
+
output "<#{tag}>#{@text}</#{tag}>"
|
204
|
+
elsif tag == @mix_tag
|
205
|
+
output "<#{tag}/>"
|
206
|
+
else
|
207
|
+
@indent -=1
|
208
|
+
output "</#{tag}>"
|
209
|
+
end
|
210
|
+
@text = nil
|
211
|
+
@mix_tag = nil
|
212
|
+
end
|
213
|
+
|
214
|
+
# @param [String] tag the name of the XML element from the parsed input
|
215
|
+
# @return [void] Keep clearing the text cache any time a new element is encountered
|
216
|
+
def linebreak_open(tag)
|
217
|
+
@text = nil if @text
|
218
|
+
end
|
219
|
+
|
220
|
+
# @param [String] tag the name of the XML element from the parsed input
|
221
|
+
# @return [void] Look for the LineEndings name/value pair, which is spread across multiple elements
|
222
|
+
def linebreak_close(tag)
|
223
|
+
case tag
|
224
|
+
when 'name'
|
225
|
+
@in_line_endings = false
|
226
|
+
@in_line_endings = true if @text == 'LineEndings'
|
227
|
+
when 'value'
|
228
|
+
@linebreak = @text if @in_line_endings
|
229
|
+
@in_line_endings = false
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# @param [Object] linebreak the CRLF or LF value found in the JHOVE output ()default is LF)
|
234
|
+
# @return [void] Output a textMD section within the properties element
|
235
|
+
def output_textmd(linebreak)
|
236
|
+
indent = @indent
|
237
|
+
@indent = 0
|
238
|
+
if @in_properties
|
239
|
+
# properties element tags provided by other code
|
240
|
+
output <<-EOF
|
241
|
+
<textmd:textMD>
|
242
|
+
<textmd:character_info>
|
243
|
+
<textmd:byte_order>big</textmd:byte_order>
|
244
|
+
<textmd:byte_size>8</textmd:byte_size>
|
245
|
+
<textmd:character_size>1</textmd:character_size>
|
246
|
+
<textmd:linebreak>#{linebreak}</textmd:linebreak>
|
247
|
+
</textmd:character_info>
|
248
|
+
</textmd:textMD>
|
249
|
+
EOF
|
250
|
+
else
|
251
|
+
# there were no properties elements in the input, so we must supply them ourselves
|
252
|
+
output <<-EOF
|
253
|
+
<jhove:properties>
|
254
|
+
<textmd:textMD>
|
255
|
+
<textmd:character_info>
|
256
|
+
<textmd:byte_order>big</textmd:byte_order>
|
257
|
+
<textmd:byte_size>8</textmd:byte_size>
|
258
|
+
<textmd:character_size>1</textmd:character_size>
|
259
|
+
<textmd:linebreak>#{linebreak}</textmd:linebreak>
|
260
|
+
</textmd:character_info>
|
261
|
+
</textmd:textMD>
|
262
|
+
</jhove:properties>
|
263
|
+
EOF
|
264
|
+
end
|
265
|
+
@indent = indent
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
# Below is the equivalent of a java main method.
|
272
|
+
# For this to work OK, the module/class being invoked
|
273
|
+
# must have already have been loaded by the Ruby interpreter.
|
274
|
+
|
275
|
+
if __FILE__ == $0
|
276
|
+
# Create a handler
|
277
|
+
jhovetm = JhoveTechnicalMetadata.new()
|
278
|
+
jhovetm.digital_object_id=ARGV[0]
|
279
|
+
jhovetm.output_file=Pahtname.new(ARGV[2])
|
280
|
+
# Create a SAX parser
|
281
|
+
parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
|
282
|
+
# Feed the parser some XML
|
283
|
+
parser.parse(File.open(ARGV[1], 'rb'))
|
284
|
+
end
|
data/lib/tasks/yard.rake
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
desc "Generate RDoc"
|
2
|
+
task :doc => ['doc:generate']
|
3
|
+
|
4
|
+
namespace :doc do
|
5
|
+
project_root = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
6
|
+
doc_destination = File.join(project_root, 'doc')
|
7
|
+
|
8
|
+
begin
|
9
|
+
require 'yard'
|
10
|
+
require 'yard/rake/yardoc_task'
|
11
|
+
|
12
|
+
YARD::Rake::YardocTask.new(:generate) do |yt|
|
13
|
+
yt.files = Dir.glob(File.join(project_root, 'lib', '*.rb')) +
|
14
|
+
['-'] +
|
15
|
+
[ File.join(project_root, 'LICENSE.rdoc') ]
|
16
|
+
|
17
|
+
yt.options = ['--output-dir', doc_destination, '--hide-void-return']
|
18
|
+
end
|
19
|
+
rescue LoadError
|
20
|
+
desc "Generate YARD Documentation"
|
21
|
+
task :generate do
|
22
|
+
abort "Please install the YARD gem to generate rdoc."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
desc "Remove generated documentation"
|
27
|
+
task :clean do
|
28
|
+
rm_r doc_destination if File.exists?(doc_destination)
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jhove-service
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.3
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Richard Anderson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-10-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.3.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.3.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: equivalent-xml
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.2.2
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.2.2
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.7
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.7
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: yard
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
description: Generates JHOVE output and/or technicalMetadata
|
111
|
+
email:
|
112
|
+
- rnanders@stanford.edu
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- bin/extension-mimetype.conf
|
118
|
+
- bin/jhove-README-1st.txt
|
119
|
+
- bin/jhove-README-conf.txt
|
120
|
+
- bin/jhove.conf
|
121
|
+
- bin/JhoveApp.jar
|
122
|
+
- bin/jhoveToolkit.jar
|
123
|
+
- bin/jhoveToolkit.sh
|
124
|
+
- lib/jhove_service.rb
|
125
|
+
- lib/jhove_technical_metadata.rb
|
126
|
+
- lib/tasks/yard.rake
|
127
|
+
- LICENSE.rdoc
|
128
|
+
- README.rdoc
|
129
|
+
homepage:
|
130
|
+
licenses: []
|
131
|
+
post_install_message:
|
132
|
+
rdoc_options: []
|
133
|
+
require_paths:
|
134
|
+
- lib
|
135
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
none: false
|
137
|
+
requirements:
|
138
|
+
- - ! '>='
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
segments:
|
142
|
+
- 0
|
143
|
+
hash: 4252808025718583470
|
144
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.3.6
|
150
|
+
requirements: []
|
151
|
+
rubyforge_project:
|
152
|
+
rubygems_version: 1.8.23
|
153
|
+
signing_key:
|
154
|
+
specification_version: 3
|
155
|
+
summary: Generates JHOVE output and/or technicalMetadata
|
156
|
+
test_files: []
|
157
|
+
has_rdoc:
|