jhove-service 1.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/LICENSE.rdoc +23 -0
- data/README.rdoc +28 -0
- data/bin/JhoveApp.jar +0 -0
- data/bin/extension-mimetype.conf +209 -0
- data/bin/jhove-README-1st.txt +225 -0
- data/bin/jhove-README-conf.txt +63 -0
- data/bin/jhove.conf +50 -0
- data/bin/jhoveToolkit.jar +0 -0
- data/bin/jhoveToolkit.sh +23 -0
- data/lib/jhove_service.rb +94 -0
- data/lib/jhove_technical_metadata.rb +284 -0
- data/lib/tasks/yard.rake +31 -0
- metadata +157 -0
data/LICENSE.rdoc
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
Copyright (c) 2013 by The Board of Trustees of the Leland Stanford Junior
|
2
|
+
University. All rights reserved.
|
3
|
+
|
4
|
+
Redistribution and use of this distribution in source and binary forms,
|
5
|
+
with or without modification, are permitted provided that:
|
6
|
+
* The above copyright notice and this permission notice appear in all copies and supporting documentation;
|
7
|
+
* The name, identifiers, and trademarks of The Board of Trustees
|
8
|
+
of the Leland Stanford Junior University are not used in advertising or publicity
|
9
|
+
without the express prior written permission of The Board of Trustees
|
10
|
+
of the Leland Stanford Junior University;
|
11
|
+
* Recipients acknowledge that this distribution is made available as a research courtesy,
|
12
|
+
"as is", potentially with defects, without any obligation on the part
|
13
|
+
of The Board of Trustees of the Leland Stanford Junior University to provide support,
|
14
|
+
services, or repair;
|
15
|
+
|
16
|
+
THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY DISCLAIMS ALL WARRANTIES,
|
17
|
+
EXPRESS OR IMPLIED, WITH REGARD TO THIS SOFTWARE, INCLUDING WITHOUT LIMITATION
|
18
|
+
ALL IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE,
|
19
|
+
AND IN NO EVENT SHALL THE BOARD OF TRUSTEES OF THE LELAND STANFORD JUNIOR UNIVERSITY
|
20
|
+
BE LIABLE FOR ANY SPECIAL, INDIRECT OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES WHATSOEVER
|
21
|
+
RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION OF CONTRACT, TORT
|
22
|
+
(INCLUDING NEGLIGENCE) OR STRICT LIABILITY, ARISING OUT OF OR IN CONNECTION
|
23
|
+
WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
|
data/README.rdoc
ADDED
@@ -0,0 +1,28 @@
|
|
1
|
+
= jhove-service
|
2
|
+
|
3
|
+
Require the following:
|
4
|
+
require 'jhove_service'
|
5
|
+
|
6
|
+
|
7
|
+
This will give you:
|
8
|
+
JhoveService
|
9
|
+
|
10
|
+
== Build and release procedure
|
11
|
+
Modify the version number in jhove-service.gemspec, then push your commits to AFS. DO NOT TAG!
|
12
|
+
Run: 'rake dlss_release' to tag, build, and publish the gem
|
13
|
+
See the Rakefile and the LyberCore::DlssRelease task in lyberteam-devel/lib/dlss/rake/dlss_release.rb
|
14
|
+
for more details
|
15
|
+
|
16
|
+
== Releases
|
17
|
+
- <b>0.1.0</b> First release
|
18
|
+
- <b>0.1.1</b> Rename temp to target (location in which to create output)
|
19
|
+
- <b>0.2.0</b> Change method of transforming JHOVE output to technicalMetadata
|
20
|
+
- <b>0.2.1</b> Enable processing of a subset of files in a directory
|
21
|
+
- <b>0.2.2</b> Enable upgrading of old technical metadata to the new format
|
22
|
+
- <b>1.0.0</b> Update JhoveToolkit.jar to v1.0.0. Fix output for filetypes w/o jhove module (bytestreams)
|
23
|
+
- <b>1.0.1</b> Updated to use new sul-gems server and new lyberteam-gems-devel utility
|
24
|
+
- <b>1.0.2</b> Fixed negative indentation problem for empty elements( e.g. <elem/>)
|
25
|
+
- <b>1.0.3</b> Upgraded bin/jhoveApp.jar to copy from Jhove 1.9 release
|
26
|
+
== Copyright
|
27
|
+
|
28
|
+
Copyright (c) 2013 Stanford University Library. See LICENSE for details.
|
data/bin/JhoveApp.jar
ADDED
Binary file
|
@@ -0,0 +1,209 @@
|
|
1
|
+
3dmf=x-world/x-3dmf
|
2
|
+
ai=application/vnd.adobe-illustrator
|
3
|
+
aif=audio/x-aiff
|
4
|
+
aifc=audio/x-aiff
|
5
|
+
aiff=audio/x-aiff
|
6
|
+
als=image/x-vivid
|
7
|
+
art=image/x-first-publisher-art
|
8
|
+
asc=image/x-hp-graphic-obj
|
9
|
+
asf=video/x-ms-asf
|
10
|
+
asp=text/html
|
11
|
+
asx=video/x-ms-asx
|
12
|
+
au=audio/x-au
|
13
|
+
avi=video/x-msvideo
|
14
|
+
bcpio=application/dca-rft
|
15
|
+
bin=application/macbinary
|
16
|
+
bm=image/x-xbitmap
|
17
|
+
bmp=image/bmp
|
18
|
+
c4=image/x-ccitt4
|
19
|
+
cal=image/x-cals
|
20
|
+
cals=image/x-cals
|
21
|
+
ccrf=image/x-calcomp-ccrf
|
22
|
+
cdda=audio/x-aiff
|
23
|
+
cdr=image/vnd.corel-draw
|
24
|
+
cel=image/x-lumena-cel
|
25
|
+
cfm=text/html
|
26
|
+
cgi=text/html
|
27
|
+
class=application/octet-stream
|
28
|
+
clp=application/x-pcpaint
|
29
|
+
cm=image/x-puzzle
|
30
|
+
cpio=application/x-DisplayWrite-txt
|
31
|
+
crf=image/x-calcomp-ccrf
|
32
|
+
css=text/css
|
33
|
+
ct=image/x-scitex-ct
|
34
|
+
dcr=application/x-director
|
35
|
+
dcx=image/x-pcx
|
36
|
+
dir=application/x-director
|
37
|
+
doc=application/vnd.ms-word
|
38
|
+
dvi=application/x-dvi
|
39
|
+
dxf=application/x-autoCAD-dxf
|
40
|
+
dxr=application/x-director
|
41
|
+
eml=text/plain
|
42
|
+
epi=image/x-eps-interchange
|
43
|
+
eps=image/eps
|
44
|
+
epsf=image/eps
|
45
|
+
epsi=image/x-eps-interchange
|
46
|
+
etx=text/x-setextetx
|
47
|
+
evy=application/x-envoy
|
48
|
+
exe=application/octet-stream
|
49
|
+
fh7=application/vnd.mm-freehand
|
50
|
+
fif=image/x-fractal
|
51
|
+
gif=image/gif
|
52
|
+
gis=application/x-erdas-lan
|
53
|
+
gm=image/x-autologic-gm
|
54
|
+
gm2=image/x-autologic-gm
|
55
|
+
gm4=image/x-autologic-gm
|
56
|
+
goe=image/x-goes
|
57
|
+
goes=image/x-goes
|
58
|
+
grb=image/x-hp-graphic-obj
|
59
|
+
gtar=application/x-gtar
|
60
|
+
hdf=application/vnd.samna
|
61
|
+
hpg=application/vnd.hp-HPGL
|
62
|
+
hpgl=application/vnd.hp-HPGL
|
63
|
+
hqx=application/mac-binhex40
|
64
|
+
hrf=image/x-hitachi-raster
|
65
|
+
htm=text/html
|
66
|
+
html=text/html
|
67
|
+
ico=image/x-sun-icon
|
68
|
+
icon=image/x-sun-icon
|
69
|
+
ics=application/ProWrite
|
70
|
+
idc=image/x-core-idc
|
71
|
+
ief=image/ief
|
72
|
+
iff=image/x-iff-ilbm
|
73
|
+
ilbm=image/x-iff-ilbm
|
74
|
+
im=image/x-sun-raster
|
75
|
+
im1=image/x-sun-raster
|
76
|
+
im24=image/x-sun-raster
|
77
|
+
im32=image/x-sun-raster
|
78
|
+
im8=image/x-sun-raster
|
79
|
+
img=image/x-vivid
|
80
|
+
ind=application/vnd.adobe-indesign
|
81
|
+
indd=application/vnd.adobe-indesign
|
82
|
+
jpe=image/jpeg
|
83
|
+
jpeg=image/jpeg
|
84
|
+
jpg=image/jpeg
|
85
|
+
jp2=image/jp2
|
86
|
+
js=application/x-javascript
|
87
|
+
kar=audio/midi
|
88
|
+
lan=application/x-erdas-lan
|
89
|
+
latex=application/x-latex
|
90
|
+
lbm=image/x-iff-ilbm
|
91
|
+
mac=image/x-iff-ilbm
|
92
|
+
mcw=application/vnd.ms-word
|
93
|
+
mdb=application/x-msaccess
|
94
|
+
mht=text/html
|
95
|
+
mid=audio/midi
|
96
|
+
midi=audio/midi
|
97
|
+
mif=application/vnd.framemaker-mif
|
98
|
+
mov=video/quicktime
|
99
|
+
movie=video/x-sgi-movie
|
100
|
+
mp2=audio/x-mpeg
|
101
|
+
MP2=audio/x-mpeg
|
102
|
+
mp3=audio/x-mpeg
|
103
|
+
mpa=audio/x-mpeg,video/x-mpeg
|
104
|
+
mpe=video/mpeg
|
105
|
+
mpeg=video/mpeg
|
106
|
+
mpg=video/mpeg
|
107
|
+
mpga=audio/x-mpeg
|
108
|
+
ms-powerpoint=application/vnd.ms-powerpoint
|
109
|
+
msword=application/vnd.ms-word
|
110
|
+
mtv=image/x-mtv-ray
|
111
|
+
navb=image/x-navb
|
112
|
+
octet-stream=application/octet-stream
|
113
|
+
oda=application/x-pfsWrite
|
114
|
+
pat=image/x-us-patent
|
115
|
+
pbm=image/x-portable-bitmap
|
116
|
+
pcd=image/x-photo-cd
|
117
|
+
pcl=application/vnd.hp-PCL
|
118
|
+
pcx=image/x-pcx
|
119
|
+
pdf=application/pdf
|
120
|
+
pds=application/vnd.NASA-PDS
|
121
|
+
pgm=image/x-portable-graymap
|
122
|
+
photo=text/plain
|
123
|
+
php=text/plain
|
124
|
+
pic=image/x-macintosh-pict
|
125
|
+
pict=image/x-macintosh-pict
|
126
|
+
pjpeg=image/jpeg
|
127
|
+
pl=text/plain
|
128
|
+
pm=image/x-ibm-picture-mkr
|
129
|
+
png=image/png
|
130
|
+
pnm=image/x-portable-anymap
|
131
|
+
ppm=image/x-portable-pixmap
|
132
|
+
pps=application/vnd.ms-powerpoint
|
133
|
+
ppt=application/vnd.ms-powerpoint
|
134
|
+
ppz=application/vnd.ms-powerpoint
|
135
|
+
prn=image/x-calcomp-ccrf
|
136
|
+
ps=application/postscript
|
137
|
+
psd=application/vnd.adobe-photoshop
|
138
|
+
puzzle=image/x-puzzle
|
139
|
+
pzl=image/x-puzzle
|
140
|
+
qdv=image/x-qdv
|
141
|
+
qpr=application/quattroPro
|
142
|
+
qt=video/quicktime
|
143
|
+
qxd=application/vnd.quark
|
144
|
+
ra=audio/vnd.realaudio
|
145
|
+
ram=audio/x-pn-realaudio
|
146
|
+
ras=image/x-sun-raster
|
147
|
+
rast=image/x-qdv
|
148
|
+
raw=application/vnd.adobe-photoshop
|
149
|
+
rg=image/x-raster-graphics
|
150
|
+
rgb=image/x-rgb
|
151
|
+
rix=image/x-rix
|
152
|
+
rlc=image/x-rlc
|
153
|
+
rle=image/x-utah-raster
|
154
|
+
rm=audio/x-realmedia,video/vnd.realvideo
|
155
|
+
rnc=application/vnd.relax-ng.rnc
|
156
|
+
rtf=text/richtext
|
157
|
+
rtl=application/vnd.hp-RTL
|
158
|
+
scd=image/x-scodl
|
159
|
+
scx=image/x-rix
|
160
|
+
sea=application/sit
|
161
|
+
sgi=image/x-sgi-image
|
162
|
+
sgm=text/sgml
|
163
|
+
sgml=text/sgml
|
164
|
+
shtml=text/html
|
165
|
+
sit=application/sit
|
166
|
+
smil=application/smil
|
167
|
+
snd=audio/basic
|
168
|
+
spl=application/futuresplash
|
169
|
+
spr=application/x-sprite
|
170
|
+
sprite=application/x-sprite
|
171
|
+
sst=image/x-avhrr
|
172
|
+
svp=video/x-pn-realvideo
|
173
|
+
swf=application/x-shockwave-flash
|
174
|
+
tar=application/x-tar
|
175
|
+
tbk=application/toolbook
|
176
|
+
tex=application/x-tex
|
177
|
+
texi=application/x-FirstChoice
|
178
|
+
texinfo=application/x-FirstChoice
|
179
|
+
tga=image/x-targa
|
180
|
+
tif=image/tiff
|
181
|
+
tiff=image/tiff
|
182
|
+
tsp=application/x-SmartDraw
|
183
|
+
tsv=text/tab-separated-values
|
184
|
+
txt=text/plain
|
185
|
+
ustar=application/x-wordstar
|
186
|
+
vi=image/x-jovian-vi
|
187
|
+
vif=image/x-verity-image
|
188
|
+
vit=image/x-vitec
|
189
|
+
vrml=x-world/x-vrml
|
190
|
+
wav=audio/x-wave
|
191
|
+
wk1=application/vnd.lotus1-2-3
|
192
|
+
wk3=application/vnd.lotus1-2-3
|
193
|
+
wk4=application/vnd.lotus1-2-3
|
194
|
+
wks=application/vnd.lotus1-2-3
|
195
|
+
wma=audio/x-ms-wma
|
196
|
+
wmv=video/x-ms-wmv
|
197
|
+
wpd=application/wordperfect
|
198
|
+
wpg=application/x-wordperfect-graphic
|
199
|
+
wrl=x-world/x-vrml
|
200
|
+
xbm=image/x-xbitmap
|
201
|
+
xls=application/vnd.ms-excel
|
202
|
+
xml=text/xml
|
203
|
+
xsd=text/xml
|
204
|
+
xsl=text/xml
|
205
|
+
xpm=image/x-xpixmap
|
206
|
+
x-png=image/png
|
207
|
+
x-shockwave-flash=application/x-shockwave-flash
|
208
|
+
xwd=image/x-xwindowdump
|
209
|
+
zip=application/zip
|
@@ -0,0 +1,225 @@
|
|
1
|
+
JHOVE - JSTOR/Harvard Object Validation Environment
|
2
|
+
Copyright 2003-2008 by JSTOR and the President and Fellows of Harvard College
|
3
|
+
JHOVE is made available under the GNU Lesser General Public License (LGPL;
|
4
|
+
see the file LICENSE for details)
|
5
|
+
|
6
|
+
Rev. 1.2, 2009-02-10
|
7
|
+
|
8
|
+
JHOVE (the JSTOR/Harvard Object Validation Environment, pronounced "jhove")
|
9
|
+
is an extensible software framework for performing format identification,
|
10
|
+
validation, and characterization of digital objects.
|
11
|
+
|
12
|
+
o Format identification is the process of determining the format to which a
|
13
|
+
digital object conforms: "I have a digital object; what format is it?"
|
14
|
+
o Format validation is the process of determining the level of compliance of a
|
15
|
+
digital object to the specification for its purported format: "I have an
|
16
|
+
object purportedly of format F; is it?"
|
17
|
+
o Format characterization is the process of determing the format-specific
|
18
|
+
significant properties of an object of a given format: "I have an object of
|
19
|
+
format F; what are its salient properties?"
|
20
|
+
|
21
|
+
These actions are frequently necessary during routine operation of digital
|
22
|
+
repositories and for digital preservation activities.
|
23
|
+
|
24
|
+
The output from JHOVE is controlled by output handlers. JHOVE uses an
|
25
|
+
extensible plug-in architecture; it can be configured at the time of its
|
26
|
+
invocation to include whatever specific format modules and output handlers
|
27
|
+
that are desired. The initial release of JHOVE includes modules for
|
28
|
+
arbitrary byte streams, ASCII and UTF-8 encoded text, AIFF and WAVE audio,
|
29
|
+
GIF, JPEG, JPEG 2000, TIFF, and PDF; and text and XML output handlers.
|
30
|
+
|
31
|
+
The JHOVE project is a collaboration of JSTOR and the Harvard University
|
32
|
+
Library. Development of JHOVE was funded in part by the Andrew W. Mellon
|
33
|
+
Foundation. JHOVE is made available under the GNU Lesser General Public
|
34
|
+
License (LGPL; see the file LICENSE for details).
|
35
|
+
|
36
|
+
REQUIREMENTS
|
37
|
+
|
38
|
+
1. Java J2SE 1.4
|
39
|
+
(JHOVE was originally implemented using the Sun J2SE SDK 1.4.1 and has
|
40
|
+
been tested to work with 1.4.2 <http://java.sun.com/j2se/1.4.2/>)
|
41
|
+
|
42
|
+
2. If you would like to compile the JHOVE source code, then
|
43
|
+
Apache Ant, a Java-based build tool <http://ant.apache.org/> is necessary.
|
44
|
+
Note that the JAVA_HOME environment variable must be set appropriately for
|
45
|
+
Ant to work properly.
|
46
|
+
(JHOVE was implemented and tested using Ant 1.5.1.)
|
47
|
+
|
48
|
+
DISTRIBUTION
|
49
|
+
|
50
|
+
The JHOVE distribution package includes:
|
51
|
+
|
52
|
+
jhove/ # JHOVE home directory
|
53
|
+
COPYING # GNU Lesser General Public License
|
54
|
+
LICENSE # JHOVE license information
|
55
|
+
README
|
56
|
+
RELEASENOTES # JHOVE release notes
|
57
|
+
bin/
|
58
|
+
jhove.jar # JHOVE API package
|
59
|
+
jhove-handler.jar # Standard output handler package
|
60
|
+
jhove-module.jar # Standard module package
|
61
|
+
JhoveApp.jar # JHOVE command line application
|
62
|
+
JhoveView.jar # JHOVE with Swing GUI front-end
|
63
|
+
build.xml # Ant configuration file
|
64
|
+
classes/
|
65
|
+
build.xml # Ant configuration file
|
66
|
+
edu/ ... # JHOVE API packages
|
67
|
+
ADump.* # AIFF dump utility class
|
68
|
+
GDump.* # GIF dump utility class
|
69
|
+
Jhove.* # JHOVE main class
|
70
|
+
JDump.* # JPEG dump utility class
|
71
|
+
J2Dump.* # JPEG 2000 dump utility class
|
72
|
+
PDump.* # PDF dump utility class
|
73
|
+
TDump.* # TIFF dump utility class
|
74
|
+
UserHome.* # user.home property utility class
|
75
|
+
WDump.* # WAVE dump utility class
|
76
|
+
conf/
|
77
|
+
jhove.conf # JHOVE configuration file
|
78
|
+
jhove.xsd # JHOVE output schema
|
79
|
+
jhoveConfig.xsd # JHOVE configuration file schema
|
80
|
+
doc/
|
81
|
+
*.html # API documentation
|
82
|
+
...
|
83
|
+
examples/ # Sample files
|
84
|
+
ascii/ ...
|
85
|
+
gif/ ...
|
86
|
+
jpeg/ ...
|
87
|
+
jpeg2000/ ...
|
88
|
+
pdf/ ...
|
89
|
+
tiff/ ...
|
90
|
+
utf-8/ ...
|
91
|
+
adump* # AIFF dump Bourne shell driver
|
92
|
+
adump.bat* # AIFF dump DOS shell driver script
|
93
|
+
gdump* # GIF dump Bourne shell driver
|
94
|
+
gdump.bat* # GIF dump DOS shell driver script
|
95
|
+
jdump* # JPEG dump Bourne shell driver
|
96
|
+
jdump.bat* # JPEG dump DOS shell driver script
|
97
|
+
j2dump* # JPEG 2000 dump Bourne shell driver
|
98
|
+
j2dump.bat* # JPEG 2000 dump DOS shell driver
|
99
|
+
jhove.tmpl* # Template for JHOVE Bourne shell driver script
|
100
|
+
jhove_bat.tmpl* # Template for JHOVE DOS shell driver script
|
101
|
+
pdump* # PDF dump Bourne shell driver
|
102
|
+
pdump.bat* # PDF dump DOS shell driver script
|
103
|
+
tdump* # TIFF dump Bourne shell driver
|
104
|
+
tdump.bat* # TIFF dump DOS shell driver script
|
105
|
+
userhome* # user.home Bourne shell driver
|
106
|
+
userhome.bat* # user.home DOS shell driver script
|
107
|
+
wdump* # WAVE dump Bourne shell driver
|
108
|
+
wdump.bat* # WAVE dump DOS shell driver script
|
109
|
+
|
110
|
+
INSTALLATION
|
111
|
+
|
112
|
+
Edit the configuration file, jhove/conf/jhove.conf, and set the absolute
|
113
|
+
pathname of the JHOVE home directory and the temporary directory (in which
|
114
|
+
temporary files are created):
|
115
|
+
|
116
|
+
<jhoveHome>jhove-home-directory</jhoveHome>
|
117
|
+
<tempDirectory>temporary-directory</tempDirectory>
|
118
|
+
|
119
|
+
The JHOVE home directory is the top-most directory in the distribution TAR
|
120
|
+
or ZIP file. On Unix systems, "/var/tmp" is an appropriate temporary
|
121
|
+
directory; on Windows, "C:\Temp". For example, if the distribution TAR
|
122
|
+
file is disaggregated on a Unix system in the directory "/users/stephen/
|
123
|
+
projects", then the configuration file should read:
|
124
|
+
|
125
|
+
<jhoveHome>/users/stephen/projects/jhove</jhoveHome>
|
126
|
+
<tempDirectory>/var/tmp</jhoveHome>
|
127
|
+
|
128
|
+
In the JHOVE home directory, copy the JHOVE Bourne shell driver script
|
129
|
+
template, "jhove.tmpl", to "jhove" (or the equivalent Windows shell
|
130
|
+
script, "jhove_bat.tmpl" to "jhove.bat"), and set the
|
131
|
+
JHOVE home directory, Java home directory, and Java interpreter:
|
132
|
+
|
133
|
+
JHOVE_HOME=jhove-home-directory
|
134
|
+
JAVA_HOME=java-home-directory
|
135
|
+
JAVA=java-interpreter
|
136
|
+
|
137
|
+
The JAVA_HOME property should provide the absolute pathname of the Java
|
138
|
+
runtime or SDK installation; JAVA should provide the absolute pathname of the
|
139
|
+
Java interpreter. For example:
|
140
|
+
|
141
|
+
JHOVE_HOME=/users/stephen/projects/jhove
|
142
|
+
JAVA_HOME=/usr/local/j2re1.4.1_02
|
143
|
+
JAVA=$JAVA_HOME/bin/java
|
144
|
+
|
145
|
+
In the DOS shell driver script, jhove.bat, the equivalent three
|
146
|
+
variables are:
|
147
|
+
|
148
|
+
SET JHOVE_HOME=jhove-home-directory
|
149
|
+
SET JAVA_HOME=java-home-directory
|
150
|
+
SET JAVA=%JAVA_HOME%\bin\java
|
151
|
+
|
152
|
+
For example:
|
153
|
+
|
154
|
+
SET JHOVE_HOME="C:\Program Files\jhove"
|
155
|
+
SET JAVA_HOME="C:\Program Files\java\j2re1.4.1_02"
|
156
|
+
SET JAVA=%JAVA_HOME%\bin\java
|
157
|
+
|
158
|
+
The quotation marks are necessary because of the embedded space characters.
|
159
|
+
On Windows platforms it may also be necessary to add the Java bin subdirectory
|
160
|
+
to the System PATH environment variable:
|
161
|
+
|
162
|
+
PATH=C:\Program Files\java\j2re1.4.1_02\bin;...
|
163
|
+
|
164
|
+
(For information on setting a Windows environment variable, consult your local
|
165
|
+
documentation or system administrator.)
|
166
|
+
|
167
|
+
USAGE
|
168
|
+
|
169
|
+
java Jhove [-c config] [-m module] [-h handler] [-e encoding] [-H handler]
|
170
|
+
[-o output] [-x saxclass] [-t tempdir] [-b bufsize]
|
171
|
+
[-l loglevel] [[-krs] dir-file-or-uri [...]]
|
172
|
+
|
173
|
+
where -c config Configuration file pathname
|
174
|
+
-m module Module name
|
175
|
+
-h handler Output handler name (defaults to TEXT)
|
176
|
+
-e encoding Character encoding used by output handler (defaults to UTF-8)
|
177
|
+
-H handler About handler name
|
178
|
+
-o output Output file pathname (defaults to standard output)
|
179
|
+
-x saxclass SAX parser class (defaults to J2SE 1.4 default)
|
180
|
+
-t tempdir Temporary directory in which to create temporary files
|
181
|
+
-b bufsize Buffer size for buffered I/O (defaults to J2SE 1.4 default)
|
182
|
+
-l loglevel Logging level
|
183
|
+
-k Calculate CRC32, MD5, and SHA-1 checksums
|
184
|
+
-r Display raw data flags, not textual equivalents
|
185
|
+
-s Format identification based on internal signatures only
|
186
|
+
dir-file-or-uri Directory or file pathname or URI of formated content
|
187
|
+
stream
|
188
|
+
|
189
|
+
All named modules and output handlers must be found on the Java CLASSPATH at
|
190
|
+
the time of invocation. The JHOVE driver script, jhove/jhove, automatically
|
191
|
+
sets the CLASSPATH and invokes the Jhove main class:
|
192
|
+
|
193
|
+
jhove [-c config] [-m module] [-h handler] [-e encoding] [-H handler]
|
194
|
+
[-o output] [-x saxclass] [-t tempdir] [-b bufsize] [-l loglevel]
|
195
|
+
[[-krs] dir-file-or-uri [...]]
|
196
|
+
|
197
|
+
The following additional programs are available, primarily for testing
|
198
|
+
and debugging purposes. They display a minimally processed, human-readable
|
199
|
+
version of the contents of AIFF, GIF, JPEG, JPEG 2000, PDF, TIFF, and WAVE
|
200
|
+
files:
|
201
|
+
|
202
|
+
java ADump aiff-file
|
203
|
+
java GDump gif-file
|
204
|
+
java JDump jpeg-file
|
205
|
+
java J2Dump jpeg2000-file
|
206
|
+
java PDump pdf-file
|
207
|
+
java TDump tiff-file
|
208
|
+
java WDump wave-file
|
209
|
+
|
210
|
+
For convenience, the following driver scripts are also available:
|
211
|
+
|
212
|
+
adump aiff-file
|
213
|
+
gdump gif-file
|
214
|
+
jdump jpeg-file
|
215
|
+
j2dump jpeg2000-file
|
216
|
+
pdump pdf-file
|
217
|
+
tdump tiff-file
|
218
|
+
wdump wave-file
|
219
|
+
|
220
|
+
The JHOVE Swing-based GUI interface can be invoked from a command shell from
|
221
|
+
the jhove/bin sub-directory:
|
222
|
+
|
223
|
+
java -jar JhoveView.jar -c <configFile>
|
224
|
+
|
225
|
+
where <configFile> is the pathname of the JHOVE configuration file.
|
@@ -0,0 +1,63 @@
|
|
1
|
+
JHOVE - JSTOR/Harvard Object Validation Environment
|
2
|
+
Copyright 2003-2007 by JSTOR and the President and Fellows of Harvard College
|
3
|
+
JHOVE is made available under the GNU General Public License (see the file
|
4
|
+
LICENSE for details)
|
5
|
+
|
6
|
+
Rev. 2007-08-30
|
7
|
+
|
8
|
+
Edit the configuration file, jhove.conf, and set the JHOVE home
|
9
|
+
directory:
|
10
|
+
|
11
|
+
<jhoveHome>jhove-home-directory</jhoveHome>
|
12
|
+
|
13
|
+
and temporary directory:
|
14
|
+
|
15
|
+
<tempDirectory>temporary-directory</tempDirectory>
|
16
|
+
|
17
|
+
On most Unix systems, a reasonable temporary directory is "/var/tmp";
|
18
|
+
on Windows, "C:\temp".
|
19
|
+
|
20
|
+
The optional
|
21
|
+
|
22
|
+
<bufferSize>buffer-size</bufferSize>
|
23
|
+
|
24
|
+
element defines the buffer size used for buffer I/O operations.
|
25
|
+
|
26
|
+
The optional
|
27
|
+
|
28
|
+
<mixVersion>1.0</mixVersion>
|
29
|
+
|
30
|
+
element specifies that the XML output handler should conform to the
|
31
|
+
MIX 1.0 schema. The default behavior is for handler output to conform
|
32
|
+
to the MIX 0.2 schema.
|
33
|
+
|
34
|
+
The optional
|
35
|
+
|
36
|
+
<sigBytes>n</sigBytes>
|
37
|
+
|
38
|
+
element specifies that JHOVE modules will look for format signatures
|
39
|
+
in the first <n> bytes of the file. The default value is 1024.
|
40
|
+
|
41
|
+
All class names must be fully qualified with their package name:
|
42
|
+
|
43
|
+
<module>
|
44
|
+
<class>fully-package-qualified-class-name</class>
|
45
|
+
<init>optional-initialization-argument</init>
|
46
|
+
<param>optional-invocation-argument</param>
|
47
|
+
</module>
|
48
|
+
|
49
|
+
The optional <init> argument is passed to the module once at the time
|
50
|
+
its class is instantiated. See module-specific documentation for a
|
51
|
+
description of any initialization options.
|
52
|
+
|
53
|
+
The optional <param> argument is passed to the module every time it is
|
54
|
+
invoked. See module-specific documentation for a description of any
|
55
|
+
invocation options.
|
56
|
+
|
57
|
+
The order in which format modules are defined is important; when
|
58
|
+
performing a format identification operation, JHOVE will search for a
|
59
|
+
matching module in the order in which the modules are defined in the
|
60
|
+
configuration file. In general, the modules for more generic formats
|
61
|
+
should come later in the list. For example, the standard module ASCII
|
62
|
+
should be defined before the UTF-8 module, since all ASCII objects
|
63
|
+
are, by definition, UTF-8 objects, but not vice versa.
|
data/bin/jhove.conf
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<jhoveConfig version="1.1"
|
3
|
+
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
4
|
+
xmlns="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig"
|
5
|
+
xsi:schemaLocation="http://hul.harvard.edu/ois/xml/ns/jhove/jhoveConfig
|
6
|
+
http://cosimo.stanford.edu/standards/jhove/v1/jhoveConfig.xsd">
|
7
|
+
<jhoveHome></jhoveHome>
|
8
|
+
<defaultEncoding>utf-8</defaultEncoding>
|
9
|
+
<tempDirectory>/tmp</tempDirectory>
|
10
|
+
<bufferSize>131072</bufferSize>
|
11
|
+
<mixVersion>1.0</mixVersion>
|
12
|
+
<sigBytes>1024</sigBytes>
|
13
|
+
<module>
|
14
|
+
<class>edu.harvard.hul.ois.jhove.module.AiffModule</class>
|
15
|
+
</module>
|
16
|
+
<module>
|
17
|
+
<class>edu.harvard.hul.ois.jhove.module.WaveModule</class>
|
18
|
+
</module>
|
19
|
+
<module>
|
20
|
+
<class>edu.harvard.hul.ois.jhove.module.PdfModule</class>
|
21
|
+
</module>
|
22
|
+
<module>
|
23
|
+
<class>edu.harvard.hul.ois.jhove.module.Jpeg2000Module</class>
|
24
|
+
</module>
|
25
|
+
<module>
|
26
|
+
<class>edu.harvard.hul.ois.jhove.module.JpegModule</class>
|
27
|
+
</module>
|
28
|
+
<module>
|
29
|
+
<class>edu.harvard.hul.ois.jhove.module.GifModule</class>
|
30
|
+
</module>
|
31
|
+
<module>
|
32
|
+
<class>edu.harvard.hul.ois.jhove.module.TiffModule</class>
|
33
|
+
</module>
|
34
|
+
<module>
|
35
|
+
<class>edu.harvard.hul.ois.jhove.module.XmlModule</class>
|
36
|
+
</module>
|
37
|
+
<module>
|
38
|
+
<class>edu.harvard.hul.ois.jhove.module.HtmlModule</class>
|
39
|
+
</module>
|
40
|
+
<module>
|
41
|
+
<class>edu.harvard.hul.ois.jhove.module.AsciiModule</class>
|
42
|
+
</module>
|
43
|
+
<module>
|
44
|
+
<class>edu.harvard.hul.ois.jhove.module.Utf8Module</class>
|
45
|
+
</module>
|
46
|
+
<outputHandler>
|
47
|
+
<class>edu.harvard.hul.ois.jhove.handler.XmlHandler</class>
|
48
|
+
</outputHandler>
|
49
|
+
|
50
|
+
</jhoveConfig>
|
Binary file
|
data/bin/jhoveToolkit.sh
ADDED
@@ -0,0 +1,23 @@
|
|
1
|
+
#!/bin/sh
|
2
|
+
|
3
|
+
JHOVE_HOME=`dirname $0`
|
4
|
+
export JHOVE_HOME
|
5
|
+
JAVA_HOME=/etc/alternatives/jre
|
6
|
+
JAVA=/usr/bin/java
|
7
|
+
|
8
|
+
CP=${JHOVE_HOME}/jhoveToolkit.jar:${JHOVE_HOME}/JhoveApp.jar
|
9
|
+
|
10
|
+
# Retrieve a copy of all command line arguments to pass to the application.
|
11
|
+
# Since looping over the positional parameters is such a common thing to do in scripts,
|
12
|
+
# for arg
|
13
|
+
# defaults to
|
14
|
+
# for arg in "$@".
|
15
|
+
# The double-quoted "$@" is special magic that causes each parameter to be used as a single word
|
16
|
+
|
17
|
+
ARGS=""
|
18
|
+
for ARG do
|
19
|
+
ARGS="$ARGS $ARG"
|
20
|
+
done
|
21
|
+
|
22
|
+
# Set the CLASSPATH and invoke the Java loader.
|
23
|
+
${JAVA} -Xms128M -Xmx3000M -classpath $CP $ARGS
|
@@ -0,0 +1,94 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
require 'pathname'
|
3
|
+
require 'jhove_technical_metadata'
|
4
|
+
require 'stringio'
|
5
|
+
|
6
|
+
class JhoveService
|
7
|
+
|
8
|
+
# @return [Pathname] The directory in which program files are located
|
9
|
+
attr_accessor :bin_pathname
|
10
|
+
|
11
|
+
# @return [Pathname] The directory in which output should be generated
|
12
|
+
attr_accessor :target_pathname
|
13
|
+
|
14
|
+
# @return [String] The druid of the object, which gets inserted in the root element of the output
|
15
|
+
attr_accessor :digital_object_id
|
16
|
+
|
17
|
+
# @param [String] target_dir The directory into which output should be generated
|
18
|
+
def initialize(target_dir=nil)
|
19
|
+
@target_pathname = Pathname.new(target_dir) unless target_dir.nil?
|
20
|
+
@bin_pathname = Pathname.new(File.expand_path(File.dirname(__FILE__) + '/../bin'))
|
21
|
+
end
|
22
|
+
|
23
|
+
# @return [String] The output file from the JHOVE run
|
24
|
+
def jhove_output
|
25
|
+
@target_pathname.join('jhove_output.xml')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @return [String] The technicalMetadata.xml output file path
|
29
|
+
def tech_md_output
|
30
|
+
@target_pathname.join('technicalMetadata.xml')
|
31
|
+
end
|
32
|
+
|
33
|
+
# @param content_dir [Pathname,String] the directory path containing the files to be analyzed by JHOVE
|
34
|
+
# @param fileset_file [Pathname,String] the pathname of the file listing which files should be processed. If nil, process all files.
|
35
|
+
# @return [String] Run JHOVE to characterize all content files, returning the output file path
|
36
|
+
def run_jhove(content_dir, fileset_file=nil)
|
37
|
+
`#{get_jhove_command(content_dir, fileset_file)}`
|
38
|
+
exitcode = $?.exitstatus
|
39
|
+
if (exitcode != 0)
|
40
|
+
raise "Error when running JHOVE against #{content_dir.to_s}"
|
41
|
+
end
|
42
|
+
jhove_output.to_s
|
43
|
+
end
|
44
|
+
|
45
|
+
# @param content_dir [Pathname,String] the directory path containing the files to be analyzed by JHOVE
|
46
|
+
# @param fileset_file [Pathname,String] the pathname of the file listing which files should be processed. If nil, process all files.
|
47
|
+
# @return [String] The jhove-toolkit command to be exectuted in a system call
|
48
|
+
def get_jhove_command(content_dir, fileset_file=nil)
|
49
|
+
if fileset_file.nil?
|
50
|
+
args = "edu.stanford.sulair.jhove.JhoveCommandLine #{content_dir.to_s}"
|
51
|
+
else
|
52
|
+
args = "edu.stanford.sulair.jhove.JhoveFileset #{content_dir.to_s} #{fileset_file.to_s}"
|
53
|
+
end
|
54
|
+
jhove_script = @bin_pathname.join('jhoveToolkit.sh').to_s
|
55
|
+
jhove_cmd = "#{jhove_script} #{args} > #{jhove_output.to_s}"
|
56
|
+
jhove_cmd
|
57
|
+
end
|
58
|
+
|
59
|
+
# @param [Pathname,String] jhove_pathname The full path of the file containing JHOVE output to be transformed to technical metadata
|
60
|
+
# @return [String] Convert jhove output it to technicalMetadata, returning the output file path
|
61
|
+
def create_technical_metadata(jhove_pathname=jhove_output)
|
62
|
+
jhove_pathname = Pathname.new(jhove_pathname)
|
63
|
+
jhovetm = JhoveTechnicalMetadata.new()
|
64
|
+
jhovetm.digital_object_id=self.digital_object_id
|
65
|
+
jhovetm.output_file=tech_md_output
|
66
|
+
# Create a SAX parser
|
67
|
+
parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
|
68
|
+
# Feed the parser some XML
|
69
|
+
parser.parse(jhove_pathname.open('rb'))
|
70
|
+
tech_md_output.to_s
|
71
|
+
end
|
72
|
+
|
73
|
+
# @param [String] old_tm the old techMD xml to be transformed to new technical metadata format
|
74
|
+
# @return [String] Convert old techMD date to new technicalMetadata format
|
75
|
+
def upgrade_technical_metadata(old_tm)
|
76
|
+
new_tm = StringIO.new()
|
77
|
+
upgrade_sax_handler = JhoveTechnicalMetadata.new()
|
78
|
+
upgrade_sax_handler.digital_object_id=self.digital_object_id
|
79
|
+
upgrade_sax_handler.ios = new_tm
|
80
|
+
# Create a SAX parser
|
81
|
+
parser = Nokogiri::XML::SAX::Parser.new(upgrade_sax_handler)
|
82
|
+
# Feed the parser some XML
|
83
|
+
parser.parse(old_tm)
|
84
|
+
new_tm.string
|
85
|
+
end
|
86
|
+
|
87
|
+
|
88
|
+
# @return [void] Cleanup the temporary workspace used to hold the metadata outputs
|
89
|
+
def cleanup()
|
90
|
+
jhove_output.delete if jhove_output.exist?
|
91
|
+
tech_md_output.delete if tech_md_output.exist?
|
92
|
+
end
|
93
|
+
|
94
|
+
end
|
@@ -0,0 +1,284 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'nokogiri'
|
3
|
+
require 'time'
|
4
|
+
require 'pathname'
|
5
|
+
|
6
|
+
# A SAX handler for filtering JHOVE output to create a technicalMetadata datastream
|
7
|
+
# The previous mechanism (uising XSLT tranformation) was causing out of memory errors,
|
8
|
+
# due to XSLT's behavior of loading both the input and output objects into memory.
|
9
|
+
class JhoveTechnicalMetadata < Nokogiri::XML::SAX::Document
|
10
|
+
|
11
|
+
# @return [IO] the output stream for the result
|
12
|
+
attr_accessor :ios
|
13
|
+
|
14
|
+
# @return [String] The druid of the object, which gets inserted in the root element of the output
|
15
|
+
attr_accessor :digital_object_id
|
16
|
+
|
17
|
+
def initialize()
|
18
|
+
@indent = 0
|
19
|
+
@ios = STDOUT #File.open(STDOUT, 'w')
|
20
|
+
end
|
21
|
+
|
22
|
+
# @param [Pathname] pathname the location of the technicalMetadata.xml file to be created
|
23
|
+
# @return [void] Opens the output stream pointing to the specified file
|
24
|
+
def output_file=(pathname)
|
25
|
+
@ios = pathname.open('w')
|
26
|
+
end
|
27
|
+
|
28
|
+
# @param [String] string The character string to be appended to the output
|
29
|
+
# @return [void] Append the specified string to the output stream
|
30
|
+
def output(string)
|
31
|
+
@ios.puts " "*@indent + string
|
32
|
+
end
|
33
|
+
|
34
|
+
# @param [String] tag the name of the XML element from the parsed input
|
35
|
+
# @param [Hash] attrs the XML attributes of the element
|
36
|
+
# @return [void] this method is called by the sax parser at the beginning of an element
|
37
|
+
def start_element(tag, attrs = [])
|
38
|
+
case tag
|
39
|
+
when 'jhove'
|
40
|
+
# <jhove> is the root element of the input
|
41
|
+
root_open(attrs)
|
42
|
+
when 'repInfo'
|
43
|
+
# A <repInfo> element contains the data for each file
|
44
|
+
file_wrapper_open(attrs)
|
45
|
+
when 'properties'
|
46
|
+
# A <properties> element contains the variable data for the file
|
47
|
+
properties_open
|
48
|
+
else
|
49
|
+
if tag[0..2] == 'mix'
|
50
|
+
# JHOVE output for image files contains tech md in MIX format that we copy verbatum to output
|
51
|
+
mix_open(tag)
|
52
|
+
elsif @in_jhove
|
53
|
+
# we've encountered one of the JHOVE elements that we want to automatically copy
|
54
|
+
jhove_open(tag, attrs)
|
55
|
+
elsif @in_properties
|
56
|
+
# we're looking for the LineEndings property in the JHOVE output
|
57
|
+
linebreak_open(tag)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
|
62
|
+
# @param [String] tag the value of a text node found in the parsed XML
|
63
|
+
# @return [void] this method is called by the sax parser when a text node is encountered
|
64
|
+
def characters(string)
|
65
|
+
@text = string
|
66
|
+
end
|
67
|
+
|
68
|
+
# @param [String] tag the name of the XML element from the parsed input
|
69
|
+
# @return [void] this method is called by the sax parser at the end of an element
|
70
|
+
def end_element(tag)
|
71
|
+
case tag
|
72
|
+
when 'jhove'
|
73
|
+
root_close
|
74
|
+
when 'repInfo'
|
75
|
+
file_wrapper_close
|
76
|
+
when 'properties'
|
77
|
+
properties_close
|
78
|
+
else
|
79
|
+
if tag[0..2] == 'mix'
|
80
|
+
mix_close(tag)
|
81
|
+
elsif @in_jhove
|
82
|
+
jhove_close(tag)
|
83
|
+
elsif @in_properties
|
84
|
+
linebreak_close(tag)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
90
|
+
# @return [void] create the <technicalMetadata> root element of the XML output and include namespace declararions
|
91
|
+
def root_open(attrs)
|
92
|
+
if @digital_object_id
|
93
|
+
output "<technicalMetadata objectId='#{@digital_object_id}' datetime='#{Time.now.utc.iso8601}'"
|
94
|
+
else
|
95
|
+
output "<technicalMetadata datetime='#{Time.now.utc.iso8601}'"
|
96
|
+
end
|
97
|
+
@indent += 2
|
98
|
+
output "xmlns:jhove='http://hul.harvard.edu/ois/xml/ns/jhove'"
|
99
|
+
output "xmlns:mix='http://www.loc.gov/mix/v10'"
|
100
|
+
output "xmlns:textmd='info:lc/xmlns/textMD-v3' >"
|
101
|
+
@indent -= 1
|
102
|
+
end
|
103
|
+
|
104
|
+
# @return [void] add the closing element of the output document
|
105
|
+
def root_close
|
106
|
+
@indent -= 1
|
107
|
+
output "</technicalMetadata>"
|
108
|
+
@ios.close
|
109
|
+
end
|
110
|
+
|
111
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
112
|
+
# @return [void] Append a <file> element to the output, setting the id attribute to the file path
|
113
|
+
def file_wrapper_open(attrs)
|
114
|
+
filepath=nil
|
115
|
+
attrs.each { |attr| filepath=attr[1] if attr[0]=='uri'}
|
116
|
+
output "<file id='#{filepath}'>"
|
117
|
+
@indent += 1
|
118
|
+
@in_jhove = true
|
119
|
+
end
|
120
|
+
|
121
|
+
# @return [void] Append a </file> tag to close the file data,
|
122
|
+
# but first inset a textMD stanza if the file has a text format
|
123
|
+
def file_wrapper_close
|
124
|
+
case @format
|
125
|
+
when 'ASCII', 'HTML','TEXT','UTF-8'
|
126
|
+
output_textmd(@linebreak)
|
127
|
+
end
|
128
|
+
@indent -= 1
|
129
|
+
output " </jhove:properties>" if @in_properties
|
130
|
+
|
131
|
+
output "</file>"
|
132
|
+
@in_jhove = false
|
133
|
+
@in_properties=false
|
134
|
+
end
|
135
|
+
|
136
|
+
# @param [String] tag the name of the XML element from the parsed input
|
137
|
+
# @param [Hash] attrs the attributes of the <jhove> element in the XML input
|
138
|
+
# @return [void] Copy this jhove element tag and its attributes verbatum
|
139
|
+
def jhove_open(tag, attrs)
|
140
|
+
if @jhove_tag # saved previously
|
141
|
+
# we encountered a new element so output what was previously cached
|
142
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}>"
|
143
|
+
@indent += 1
|
144
|
+
end
|
145
|
+
# cache the element name and its attributes
|
146
|
+
@jhove_tag = tag
|
147
|
+
@jhove_attrs = ""
|
148
|
+
attrs.each do |attr|
|
149
|
+
@jhove_attrs += " #{attr[0]}='#{attr[1]}'"
|
150
|
+
end
|
151
|
+
@text = nil
|
152
|
+
@linebreak='LF'
|
153
|
+
end
|
154
|
+
|
155
|
+
# @param [String] tag the name of the XML element from the parsed input
|
156
|
+
# @return [void] Output a closing tag, preceded by cached data, if such exists
|
157
|
+
def jhove_close(tag)
|
158
|
+
if @text && tag == @jhove_tag
|
159
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}>#{@text}</jhove:#{tag}>"
|
160
|
+
elsif tag == @jhove_tag
|
161
|
+
output "<jhove:#{@jhove_tag}#{@jhove_attrs}/>"
|
162
|
+
else
|
163
|
+
@indent -=1
|
164
|
+
output "</jhove:#{tag}>"
|
165
|
+
end
|
166
|
+
@format = @text if tag == 'format'
|
167
|
+
@text = nil
|
168
|
+
@jhove_tag = nil
|
169
|
+
@jhove_attrs=""
|
170
|
+
end
|
171
|
+
|
172
|
+
# @return [void] Output a <properties> element if one was encountered in the input,
|
173
|
+
# then ignore most input data from within the properties element, except mix and LineBreaks
|
174
|
+
def properties_open
|
175
|
+
output "<jhove:properties>"
|
176
|
+
@indent += 1
|
177
|
+
@in_jhove = false
|
178
|
+
@in_properties=true
|
179
|
+
end
|
180
|
+
|
181
|
+
# @return [void] Appending of a closing tag is handled elsewhere
|
182
|
+
def properties_close
|
183
|
+
@indent -= 1
|
184
|
+
end
|
185
|
+
|
186
|
+
# @param [String] tag the name of the XML element from the parsed input
|
187
|
+
# @return [void] Copy any Mix data verbatum,
|
188
|
+
def mix_open(tag)
|
189
|
+
if @mix_tag
|
190
|
+
# we encountered a new element so output what was previously cached
|
191
|
+
output "<#{@mix_tag}>"
|
192
|
+
@indent += 1
|
193
|
+
end
|
194
|
+
# cache the element name
|
195
|
+
@mix_tag = tag
|
196
|
+
@text = nil
|
197
|
+
end
|
198
|
+
|
199
|
+
# @param [String] tag the name of the XML element from the parsed input
|
200
|
+
# @return [void] Output a closing tag, preceded by cached data, if such exists
|
201
|
+
def mix_close(tag)
|
202
|
+
if @text && tag == @mix_tag
|
203
|
+
output "<#{tag}>#{@text}</#{tag}>"
|
204
|
+
elsif tag == @mix_tag
|
205
|
+
output "<#{tag}/>"
|
206
|
+
else
|
207
|
+
@indent -=1
|
208
|
+
output "</#{tag}>"
|
209
|
+
end
|
210
|
+
@text = nil
|
211
|
+
@mix_tag = nil
|
212
|
+
end
|
213
|
+
|
214
|
+
# @param [String] tag the name of the XML element from the parsed input
|
215
|
+
# @return [void] Keep clearing the text cache any time a new element is encountered
|
216
|
+
def linebreak_open(tag)
|
217
|
+
@text = nil if @text
|
218
|
+
end
|
219
|
+
|
220
|
+
# @param [String] tag the name of the XML element from the parsed input
|
221
|
+
# @return [void] Look for the LineEndings name/value pair, which is spread across multiple elements
|
222
|
+
def linebreak_close(tag)
|
223
|
+
case tag
|
224
|
+
when 'name'
|
225
|
+
@in_line_endings = false
|
226
|
+
@in_line_endings = true if @text == 'LineEndings'
|
227
|
+
when 'value'
|
228
|
+
@linebreak = @text if @in_line_endings
|
229
|
+
@in_line_endings = false
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
233
|
+
# @param [Object] linebreak the CRLF or LF value found in the JHOVE output ()default is LF)
|
234
|
+
# @return [void] Output a textMD section within the properties element
|
235
|
+
def output_textmd(linebreak)
|
236
|
+
indent = @indent
|
237
|
+
@indent = 0
|
238
|
+
if @in_properties
|
239
|
+
# properties element tags provided by other code
|
240
|
+
output <<-EOF
|
241
|
+
<textmd:textMD>
|
242
|
+
<textmd:character_info>
|
243
|
+
<textmd:byte_order>big</textmd:byte_order>
|
244
|
+
<textmd:byte_size>8</textmd:byte_size>
|
245
|
+
<textmd:character_size>1</textmd:character_size>
|
246
|
+
<textmd:linebreak>#{linebreak}</textmd:linebreak>
|
247
|
+
</textmd:character_info>
|
248
|
+
</textmd:textMD>
|
249
|
+
EOF
|
250
|
+
else
|
251
|
+
# there were no properties elements in the input, so we must supply them ourselves
|
252
|
+
output <<-EOF
|
253
|
+
<jhove:properties>
|
254
|
+
<textmd:textMD>
|
255
|
+
<textmd:character_info>
|
256
|
+
<textmd:byte_order>big</textmd:byte_order>
|
257
|
+
<textmd:byte_size>8</textmd:byte_size>
|
258
|
+
<textmd:character_size>1</textmd:character_size>
|
259
|
+
<textmd:linebreak>#{linebreak}</textmd:linebreak>
|
260
|
+
</textmd:character_info>
|
261
|
+
</textmd:textMD>
|
262
|
+
</jhove:properties>
|
263
|
+
EOF
|
264
|
+
end
|
265
|
+
@indent = indent
|
266
|
+
end
|
267
|
+
|
268
|
+
end
|
269
|
+
|
270
|
+
|
271
|
+
# Below is the equivalent of a java main method.
|
272
|
+
# For this to work OK, the module/class being invoked
|
273
|
+
# must have already have been loaded by the Ruby interpreter.
|
274
|
+
|
275
|
+
if __FILE__ == $0
|
276
|
+
# Create a handler
|
277
|
+
jhovetm = JhoveTechnicalMetadata.new()
|
278
|
+
jhovetm.digital_object_id=ARGV[0]
|
279
|
+
jhovetm.output_file=Pahtname.new(ARGV[2])
|
280
|
+
# Create a SAX parser
|
281
|
+
parser = Nokogiri::XML::SAX::Parser.new(jhovetm)
|
282
|
+
# Feed the parser some XML
|
283
|
+
parser.parse(File.open(ARGV[1], 'rb'))
|
284
|
+
end
|
data/lib/tasks/yard.rake
ADDED
@@ -0,0 +1,31 @@
|
|
1
|
+
desc "Generate RDoc"
|
2
|
+
task :doc => ['doc:generate']
|
3
|
+
|
4
|
+
namespace :doc do
|
5
|
+
project_root = File.expand_path(File.join(File.dirname(__FILE__), '..', '..'))
|
6
|
+
doc_destination = File.join(project_root, 'doc')
|
7
|
+
|
8
|
+
begin
|
9
|
+
require 'yard'
|
10
|
+
require 'yard/rake/yardoc_task'
|
11
|
+
|
12
|
+
YARD::Rake::YardocTask.new(:generate) do |yt|
|
13
|
+
yt.files = Dir.glob(File.join(project_root, 'lib', '*.rb')) +
|
14
|
+
['-'] +
|
15
|
+
[ File.join(project_root, 'LICENSE.rdoc') ]
|
16
|
+
|
17
|
+
yt.options = ['--output-dir', doc_destination, '--hide-void-return']
|
18
|
+
end
|
19
|
+
rescue LoadError
|
20
|
+
desc "Generate YARD Documentation"
|
21
|
+
task :generate do
|
22
|
+
abort "Please install the YARD gem to generate rdoc."
|
23
|
+
end
|
24
|
+
end
|
25
|
+
|
26
|
+
desc "Remove generated documentation"
|
27
|
+
task :clean do
|
28
|
+
rm_r doc_destination if File.exists?(doc_destination)
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
metadata
ADDED
@@ -0,0 +1,157 @@
|
|
1
|
+
--- !ruby/object:Gem::Specification
|
2
|
+
name: jhove-service
|
3
|
+
version: !ruby/object:Gem::Version
|
4
|
+
version: 1.0.3
|
5
|
+
prerelease:
|
6
|
+
platform: ruby
|
7
|
+
authors:
|
8
|
+
- Richard Anderson
|
9
|
+
autorequire:
|
10
|
+
bindir: bin
|
11
|
+
cert_chain: []
|
12
|
+
date: 2013-10-15 00:00:00.000000000 Z
|
13
|
+
dependencies:
|
14
|
+
- !ruby/object:Gem::Dependency
|
15
|
+
name: nokogiri
|
16
|
+
requirement: !ruby/object:Gem::Requirement
|
17
|
+
none: false
|
18
|
+
requirements:
|
19
|
+
- - ! '>='
|
20
|
+
- !ruby/object:Gem::Version
|
21
|
+
version: 1.4.3.1
|
22
|
+
type: :runtime
|
23
|
+
prerelease: false
|
24
|
+
version_requirements: !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
26
|
+
requirements:
|
27
|
+
- - ! '>='
|
28
|
+
- !ruby/object:Gem::Version
|
29
|
+
version: 1.4.3.1
|
30
|
+
- !ruby/object:Gem::Dependency
|
31
|
+
name: equivalent-xml
|
32
|
+
requirement: !ruby/object:Gem::Requirement
|
33
|
+
none: false
|
34
|
+
requirements:
|
35
|
+
- - ! '>='
|
36
|
+
- !ruby/object:Gem::Version
|
37
|
+
version: 0.2.2
|
38
|
+
type: :development
|
39
|
+
prerelease: false
|
40
|
+
version_requirements: !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ! '>='
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
version: 0.2.2
|
46
|
+
- !ruby/object:Gem::Dependency
|
47
|
+
name: rake
|
48
|
+
requirement: !ruby/object:Gem::Requirement
|
49
|
+
none: false
|
50
|
+
requirements:
|
51
|
+
- - ! '>='
|
52
|
+
- !ruby/object:Gem::Version
|
53
|
+
version: 0.8.7
|
54
|
+
type: :development
|
55
|
+
prerelease: false
|
56
|
+
version_requirements: !ruby/object:Gem::Requirement
|
57
|
+
none: false
|
58
|
+
requirements:
|
59
|
+
- - ! '>='
|
60
|
+
- !ruby/object:Gem::Version
|
61
|
+
version: 0.8.7
|
62
|
+
- !ruby/object:Gem::Dependency
|
63
|
+
name: rdoc
|
64
|
+
requirement: !ruby/object:Gem::Requirement
|
65
|
+
none: false
|
66
|
+
requirements:
|
67
|
+
- - ! '>='
|
68
|
+
- !ruby/object:Gem::Version
|
69
|
+
version: '0'
|
70
|
+
type: :development
|
71
|
+
prerelease: false
|
72
|
+
version_requirements: !ruby/object:Gem::Requirement
|
73
|
+
none: false
|
74
|
+
requirements:
|
75
|
+
- - ! '>='
|
76
|
+
- !ruby/object:Gem::Version
|
77
|
+
version: '0'
|
78
|
+
- !ruby/object:Gem::Dependency
|
79
|
+
name: rspec
|
80
|
+
requirement: !ruby/object:Gem::Requirement
|
81
|
+
none: false
|
82
|
+
requirements:
|
83
|
+
- - ! '>='
|
84
|
+
- !ruby/object:Gem::Version
|
85
|
+
version: '0'
|
86
|
+
type: :development
|
87
|
+
prerelease: false
|
88
|
+
version_requirements: !ruby/object:Gem::Requirement
|
89
|
+
none: false
|
90
|
+
requirements:
|
91
|
+
- - ! '>='
|
92
|
+
- !ruby/object:Gem::Version
|
93
|
+
version: '0'
|
94
|
+
- !ruby/object:Gem::Dependency
|
95
|
+
name: yard
|
96
|
+
requirement: !ruby/object:Gem::Requirement
|
97
|
+
none: false
|
98
|
+
requirements:
|
99
|
+
- - ! '>='
|
100
|
+
- !ruby/object:Gem::Version
|
101
|
+
version: '0'
|
102
|
+
type: :development
|
103
|
+
prerelease: false
|
104
|
+
version_requirements: !ruby/object:Gem::Requirement
|
105
|
+
none: false
|
106
|
+
requirements:
|
107
|
+
- - ! '>='
|
108
|
+
- !ruby/object:Gem::Version
|
109
|
+
version: '0'
|
110
|
+
description: Generates JHOVE output and/or technicalMetadata
|
111
|
+
email:
|
112
|
+
- rnanders@stanford.edu
|
113
|
+
executables: []
|
114
|
+
extensions: []
|
115
|
+
extra_rdoc_files: []
|
116
|
+
files:
|
117
|
+
- bin/extension-mimetype.conf
|
118
|
+
- bin/jhove-README-1st.txt
|
119
|
+
- bin/jhove-README-conf.txt
|
120
|
+
- bin/jhove.conf
|
121
|
+
- bin/JhoveApp.jar
|
122
|
+
- bin/jhoveToolkit.jar
|
123
|
+
- bin/jhoveToolkit.sh
|
124
|
+
- lib/jhove_service.rb
|
125
|
+
- lib/jhove_technical_metadata.rb
|
126
|
+
- lib/tasks/yard.rake
|
127
|
+
- LICENSE.rdoc
|
128
|
+
- README.rdoc
|
129
|
+
homepage:
|
130
|
+
licenses: []
|
131
|
+
post_install_message:
|
132
|
+
rdoc_options: []
|
133
|
+
require_paths:
|
134
|
+
- lib
|
135
|
+
required_ruby_version: !ruby/object:Gem::Requirement
|
136
|
+
none: false
|
137
|
+
requirements:
|
138
|
+
- - ! '>='
|
139
|
+
- !ruby/object:Gem::Version
|
140
|
+
version: '0'
|
141
|
+
segments:
|
142
|
+
- 0
|
143
|
+
hash: 4252808025718583470
|
144
|
+
required_rubygems_version: !ruby/object:Gem::Requirement
|
145
|
+
none: false
|
146
|
+
requirements:
|
147
|
+
- - ! '>='
|
148
|
+
- !ruby/object:Gem::Version
|
149
|
+
version: 1.3.6
|
150
|
+
requirements: []
|
151
|
+
rubyforge_project:
|
152
|
+
rubygems_version: 1.8.23
|
153
|
+
signing_key:
|
154
|
+
specification_version: 3
|
155
|
+
summary: Generates JHOVE output and/or technicalMetadata
|
156
|
+
test_files: []
|
157
|
+
has_rdoc:
|