marc4j4r 0.1.3 → 0.1.4

Sign up to get free protection for your applications and to get access to all the features.
@@ -63,11 +63,11 @@
63
63
  <p class="children">
64
64
 
65
65
 
66
- <strong class="modules">Modules:</strong> <a href="MARC4J4R.html" title="MARC4J4R (module)">MARC4J4R</a>
66
+ <strong class="modules">Modules:</strong> <a href="MARC.html" title="MARC (module)">MARC</a>, <a href="MARC4J4R.html" title="MARC4J4R (module)">MARC4J4R</a>
67
67
 
68
68
 
69
69
 
70
- <strong class="classes">Classes:</strong> <a href="ControlFieldImpl.html" title="ControlFieldImpl (class)">ControlFieldImpl</a>, <a href="DataFieldImpl.html" title="DataFieldImpl (class)">DataFieldImpl</a>, <a href="RecordImpl.html" title="RecordImpl (class)">RecordImpl</a>, <a href="SubfieldImpl.html" title="SubfieldImpl (class)">SubfieldImpl</a>
70
+ <strong class="classes">Classes:</strong> <a href="ASReader.html" title="ASReader (class)">ASReader</a>, <a href="ControlFieldImpl.html" title="ControlFieldImpl (class)">ControlFieldImpl</a>, <a href="DataFieldImpl.html" title="DataFieldImpl (class)">DataFieldImpl</a>, <a href="RecordImpl.html" title="RecordImpl (class)">RecordImpl</a>, <a href="SubfieldImpl.html" title="SubfieldImpl (class)">SubfieldImpl</a>
71
71
 
72
72
 
73
73
  </p>
@@ -78,7 +78,7 @@
78
78
  </div>
79
79
 
80
80
  <div id="footer">
81
- Generated on Fri Feb 12 16:38:07 2010 by
81
+ Generated on Sat Apr 3 22:01:08 2010 by
82
82
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool">yard</a>
83
83
  0.5.3 (ruby-1.8.7).
84
84
  </div>
Binary file
data/jars/marc4j.jar CHANGED
Binary file
data/lib/marc4j4r.rb CHANGED
@@ -11,6 +11,28 @@ end
11
11
 
12
12
  require 'set'
13
13
 
14
+
15
+ # Re-open the MarcReader interface, define #each and include Enumerable
16
+ #
17
+ # We also automatically call #hashify on the records that stream through
18
+ # #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
19
+ # record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
20
+ # by another 520)
21
+
22
+ module Java::OrgMarc4j::MarcReader
23
+ include Enumerable
24
+
25
+ # Return the next record, after calling #hashify on it
26
+ def each(hashify=true)
27
+ while self.hasNext
28
+ r = self.next
29
+ r.hashify if hashify
30
+ yield r
31
+ end
32
+ end
33
+ end
34
+
35
+
14
36
  module MARC4J4R
15
37
 
16
38
  # Add some sugar to the MarcReader interface
@@ -31,10 +53,11 @@ module MARC4J4R
31
53
  # can't just add it to the MarcReader interface the way I wanted to.
32
54
 
33
55
  NEWINIT = <<-ENDBINDER
56
+ include Enumerable
34
57
  alias_method :oldinit, :initialize
35
58
  def initialize(fromwhere)
36
59
  stream = nil
37
- if fromwhere.is_a? Java::JavaIO::InputStream
60
+ if fromwhere.is_a? Java::JavaIO::InputStream or fromwhere.is_a? Java::JavaIO::ByteArrayInputStream
38
61
  stream = fromwhere
39
62
  elsif fromwhere.is_a? IO
40
63
  stream = fromwhere.to_inputstream
@@ -54,6 +77,7 @@ module MARC4J4R
54
77
  Java::org.marc4j.MarcXmlReader.module_eval(NEWINIT)
55
78
 
56
79
 
80
+
57
81
  # Get a marc reader of the appropriate type
58
82
  # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
59
83
  # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
@@ -75,43 +99,84 @@ module MARC4J4R
75
99
 
76
100
  def reader(input, type = :strictmarc)
77
101
  case type
78
- when :strictmarc
102
+ when :strictmarc then
79
103
  return Java::org.marc4j.MarcStreamReader.new(input)
80
- when :permissivemarc
104
+ when :permissivemarc then
81
105
  return Java::org.marc4j.MarcPermissiveStreamReader.new(input)
82
- when :marcxml
106
+ when :marcxml then
83
107
  return Java::org.marc4j.MarcXmlReader.new(input)
108
+ when :alephsequential then
109
+ return MARC4J4R::AlephSequentialReader.new(input)
84
110
  else
85
- raise ArgumentError, "Reader type must be :strictmarc, :permissivemarc, or :marcxml"
111
+ raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
86
112
  end
87
113
  end
88
114
  module_function :reader
89
-
90
- end
91
-
92
-
93
- # Re-open the MarcReader interface, define #each and include Enumerable
94
- #
95
- # We also automatically call #hashify on the records that stream through
96
- # #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
97
- # record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
98
- # by another 520)
99
-
100
- module Java::OrgMarc4j::MarcReader
101
- include Enumerable
102
115
 
103
- # Return the next record, after calling #hashify on it
104
- def each
105
- while self.hasNext
106
- r = self.next
107
- r.hashify
108
- yield r
116
+
117
+ # Implement an AlephSequential reader
118
+ class AlephSequentialReader
119
+ include Enumerable
120
+ def initialize(fromwhere)
121
+ stream = nil
122
+ if fromwhere.is_a? Java::JavaIO::InputStream
123
+ stream = fromwhere.to_io
124
+ elsif fromwhere.is_a? IO
125
+ stream = fromwhere
126
+ else
127
+ stream = File.new(fromwhere)
128
+ end
129
+
130
+ @handle = stream
109
131
  end
110
- end
132
+
133
+ def each
134
+ record = nil
135
+ currentID = nil
136
+
137
+ @handle.each_line do |l|
138
+ l.chomp!
139
+ next unless l =~ /\S/
140
+ vals = l.unpack('a9 a a3 c c a3 a*')
141
+ id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
142
+ # id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
143
+ if id != currentID
144
+ if record
145
+ yield record
146
+ end
147
+ record = RecordImpl.new
148
+ currentID = id
149
+ end
150
+ if tag == 'LDR'
151
+ record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
152
+ else
153
+ record << buildField(tag,ind1,ind2,data)
154
+ end
155
+ end
156
+ yield record
157
+ end
158
+
159
+
160
+ SUBREGEXP = /\$\$(.)/
161
+ def buildField (tag, ind1, ind2, data)
162
+ if Java::org.marc4j.marc.impl.Verifier.isControlField tag
163
+ return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
164
+ else
165
+ f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
166
+ data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
167
+ f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
168
+ end
169
+ return f
170
+ end
171
+ end
172
+
173
+ end # End of class AlephSequentialReader
174
+
111
175
  end
112
176
 
113
177
 
114
178
 
179
+
115
180
  include_class Java::org.marc4j.marc.impl::RecordImpl
116
181
  include_class Java::org.marc4j.marc.impl::ControlFieldImpl
117
182
  include_class Java::org.marc4j.marc.impl::DataFieldImpl
@@ -122,6 +187,38 @@ include_class Java::org.marc4j.marc.impl::SubfieldImpl
122
187
 
123
188
  class RecordImpl
124
189
  include Enumerable
190
+
191
+ alias_method :<<, :addVariableField
192
+ alias_method :append, :addVariableField
193
+ alias_method :fields, :getVariableFields
194
+
195
+ # Export as a MARC-Hash, as described at
196
+ # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
197
+ # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
198
+ def to_marchash
199
+ h = {}
200
+ h['type'] = 'marc-hash'
201
+ h['version'] = [1,0]
202
+ h['leader'] = self.leader
203
+
204
+ fields = []
205
+
206
+ self.getVariableFields.each do |f|
207
+ if f.controlField?
208
+ fields << [f.tag, f.value]
209
+ else
210
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
211
+ subs = []
212
+ f.each do |subfield|
213
+ subs << [subfield.code, subfield.value]
214
+ end
215
+ farray.push subs
216
+ fields << farray
217
+ end
218
+ end
219
+ h['fields'] = fields
220
+ return h
221
+ end
125
222
 
126
223
  # Create a local hash by tag number; makes some stuff faster
127
224
  # Called automatically if you use reader.each
@@ -148,7 +245,8 @@ class RecordImpl
148
245
  def leader
149
246
  self.get_leader.toString
150
247
  end
151
-
248
+
249
+
152
250
  # Cycle through the fields in the order the appear in the record
153
251
  def each
154
252
  self.getVariableFields.each do |f|
@@ -163,7 +261,11 @@ class RecordImpl
163
261
 
164
262
  def [] tag
165
263
  if defined? @hashedtags
166
- return @hashedtags[tag][0]
264
+ if @hashedtags[tag]
265
+ return @hashedtags[tag][0]
266
+ else
267
+ return nil
268
+ end
167
269
  else
168
270
  return self.getVariableField(tag)
169
271
  end
@@ -209,7 +311,7 @@ class RecordImpl
209
311
  # # 035 $a (OCoLC)ocm00001728
210
312
 
211
313
  def find_by_tag(tags, originalorder = false)
212
- self.hashify unless @hashedtags
314
+ self.hashify unless @hashedtags and !originalorder
213
315
  if !tags.is_a? Array
214
316
  return @hashedtags[tags] || []
215
317
  end
@@ -221,18 +323,39 @@ class RecordImpl
221
323
  end
222
324
  end
223
325
 
326
+
224
327
 
225
328
  # Return the record as valid MARC-XML
226
329
  # @return String A MARC-XML representation of the record, including the XML header
227
330
  def to_xml
228
331
  return @xml if @xml
229
- @xml = java.io.StringWriter.new
230
- res = javax.xml.transform.stream.StreamResult.new(@xml)
231
- writer = org.marc4j.MarcXmlWriter.new(res)
232
- writer.write(self)
233
- return @xml.toString
332
+ begin
333
+ @xml = java.io.StringWriter.new
334
+ res = javax.xml.transform.stream.StreamResult.new(@xml)
335
+ writer = org.marc4j.MarcXmlWriter.new(res)
336
+ writer.write(self)
337
+ writer.writeEndDocument
338
+ return @xml.toString
339
+ rescue
340
+ "Woops! to_xml failed for record #{self['001'].data}: #{$!}"
341
+ end
342
+ end
343
+
344
+ def to_marc
345
+ begin
346
+ s = Java::java.io.ByteArrayOutputStream.new
347
+ writer = org.marc4j.MarcStreamWriter.new(s)
348
+ writer.write(self)
349
+ @marcbinary = s.to_string
350
+ puts @marcbinary
351
+ return @marcbinary
352
+ rescue
353
+ # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
354
+ "Whoops! Failed: #{$!}"
355
+ end
234
356
  end
235
-
357
+
358
+
236
359
  end
237
360
 
238
361
  class ControlFieldImpl
@@ -240,6 +363,14 @@ class ControlFieldImpl
240
363
  return self.data
241
364
  end
242
365
 
366
+ def controlField?
367
+ return true
368
+ end
369
+
370
+ def self.control_tag? tag
371
+ return Java::org.marc4j.marc.impl.Verifier.isControlField tag
372
+ end
373
+
243
374
  # Pretty-print
244
375
  # @param [String] joiner What string to use to join the subfields
245
376
  # @param [String] The pretty string
@@ -251,6 +382,20 @@ end
251
382
 
252
383
  class DataFieldImpl
253
384
  include Enumerable
385
+
386
+ alias_method :<<, :addSubfield
387
+
388
+
389
+ def controlField?
390
+ return false
391
+ end
392
+
393
+ # Broken. Need to check subs as well
394
+ def == other
395
+ self.tag == other.tag and
396
+ self.indicator1 == other.indicator1 and
397
+ self.indicator2 == other.indicator2
398
+ end
254
399
 
255
400
  # Pretty-print
256
401
  # @param [String] joiner What string to use to join the subfields
@@ -268,9 +413,13 @@ class DataFieldImpl
268
413
  # @return [String] The value of the first matched subfield
269
414
  def [] code
270
415
  raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
271
- # note that code[0] is just converting the one-character string into an integer
272
- # char value that the underlying java can deal with
273
- self.getSubfield(code[0]).getData
416
+ # need to send a char value that the underlying java can deal with
417
+ sub = self.getSubfield(code[0].ord)
418
+ if (sub)
419
+ return sub.getData
420
+ else
421
+ return nil
422
+ end
274
423
  end
275
424
 
276
425
 
@@ -345,6 +494,10 @@ end
345
494
 
346
495
  class SubfieldImpl
347
496
 
497
+ def == other
498
+ return self.code == other.code and self.data == other.data
499
+ end
500
+
348
501
  def value
349
502
  return self.data
350
503
  end
data/test/batch.seq ADDED
@@ -0,0 +1,118 @@
1
+ 000004262 LDR L ^^^^^nam^a22003011^^4500
2
+ 000004262 001 L 000004262
3
+ 000004262 005 L 19891107000000.0
4
+ 000004262 006 L m^^^^^^^^d^^^^^^^^
5
+ 000004262 007 L cr^bn^---auaua
6
+ 000004262 008 L 880715r19721939nyua^^^^^b^^^^00100^eng^^
7
+ 000004262 010 L $$a72001885/MN
8
+ 000004262 020 L $$a0306705133
9
+ 000004262 035 L $$a(RLIN)MIUG0297757-B
10
+ 000004262 035 L $$a(CaOTULAS)159822964
11
+ 000004262 035 L $$a(OCoLC)ocm00297757
12
+ 000004262 035 L $$asdr-inu1221428
13
+ 000004262 040 L $$aDLC$$cDLC$$dMiU
14
+ 000004262 043 L $$an------
15
+ 000004262 0500 L $$aML3557$$b.D3645 1972
16
+ 000004262 082 L $$a784.7/51
17
+ 000004262 1001 L $$aDensmore, Frances,$$d1867-1957.
18
+ 000004262 24510 L $$aNootka and Quileute music.
19
+ 000004262 260 L $$aNew York,$$bDa Capo Press,$$c1972.
20
+ 000004262 300 L $$axxvi, 358 p.$$billus.$$c23 cm.
21
+ 000004262 4900 L $$aDa Capo Press music reprint series
22
+ 000004262 500 L $$aReprint of the 1939 ed., which was issued as Bulletin 124 of Smithsonian Institution. Bureau of American Ethnology.
23
+ 000004262 504 L $$aBibliography: p. 349.
24
+ 000004262 538 L $$aMode of access: Internet.
25
+ 000004262 650 0 L $$aNootka Indians$$xMusic.
26
+ 000004262 650 0 L $$aQuileute Indians$$xMusic.
27
+ 000004262 8520 L $$aMiU$$bMUSIC$$hML3557 .D4255 1972
28
+ 000004262 852 L $$ainu$$bSDR$$cINU
29
+ 000004262 970 L $$aBK$$bBook
30
+ 000004262 970 L $$aCE$$bElectronic Resource
31
+ 000004262 971 L $$aMiU
32
+ 000004262 972 L $$c20040625
33
+ 000004262 973 L $$aHT$$bavail_ht
34
+ 000004262 973 L $$aAC$$bavail_circ
35
+ 000004262 974 L $$uinu.30000053901769$$ric$$d20100226
36
+ 000004262 998 L $$s9665
37
+ 000005951 LDR L ^^^^^nam^a22002771^^4500
38
+ 000005951 001 L 000005951
39
+ 000005951 005 L 19970922000000.0
40
+ 000005951 006 L m^^^^^^^^d^^^^^^^^
41
+ 000005951 007 L cr^bn^---auaua
42
+ 000005951 008 L 880715s1968^^^^dcu^^^^^^b^^^|00010^eng^c
43
+ 000005951 010 L $$ahew68000006
44
+ 000005951 035 L $$a(RLIN)MIUG0425743-B
45
+ 000005951 035 L $$a(CaOTULAS)159824865
46
+ 000005951 035 L $$a(OCoLC)ocm00425743
47
+ 000005951 035 L $$asdr-ucsc.b16458175
48
+ 000005951 040 L $$a*U.S. Dept. of Health Education, and Welfare. Li$$cDLC$$dMiU
49
+ 000005951 0500 L $$aHD7123$$b.A39 no. 22
50
+ 000005951 082 L $$a301.5/5
51
+ 000005951 1001 L $$aKreps, Juanita Morris.
52
+ 000005951 24510 L $$aLifetime allocation of work and leisure,$$cby Juanita M. Kreps.
53
+ 000005951 260 L $$a[Washington,$$bFor sale by Supt. of Docs., U.S. Govt. Print. Off.,$$c1968]
54
+ 000005951 300 L $$aix, 44 p.$$c24 cm.
55
+ 000005951 4901 L $$aUnited States. Social Security Administration. Office of Research and Statistics. Research report$$vno. 22
56
+ 000005951 504 L $$aBibliographical footnotes.
57
+ 000005951 538 L $$aMode of access: Internet.
58
+ 000005951 650 0 L $$aAge and employment
59
+ 000005951 650 0 L $$aRetirement
60
+ 000005951 830 0 L $$aResearch report (United States. Social Security Administration. Office of Research and Statistics) ;$$vno. 22
61
+ 000005951 8520 L $$aMiU$$bTAUB$$hHD6279 .K925
62
+ 000005951 8520 L $$aMiU$$bTAUB$$hHD 7123 .A28 no.22
63
+ 000005951 8520 L $$aMiU$$bBUHR$$cGRAD$$hHD 7123 .A28 no.22
64
+ 000005951 852 L $$auc1$$bSDR$$cUCSC
65
+ 000005951 970 L $$aBK$$bBook
66
+ 000005951 970 L $$aCE$$bElectronic Resource
67
+ 000005951 971 L $$aMiU
68
+ 000005951 972 L $$c20040625
69
+ 000005951 972 L $$c20040625
70
+ 000005951 972 L $$c20040625
71
+ 000005951 973 L $$aHT$$bavail_ht
72
+ 000005951 973 L $$aAC$$bavail_circ
73
+ 000005951 974 L $$umdp.39015038814847$$ric
74
+ 000005951 974 L $$umdp.39015072105847$$ric$$d20091001
75
+ 000005951 974 L $$zno.22$$uuc1.32106000924198$$ric$$d20091202
76
+ 000005951 998 L $$s9665
77
+ 000009811 LDR L ^^^^^nam^a22003251^^4500
78
+ 000009811 001 L 000009811
79
+ 000009811 005 L 19970922000000.0
80
+ 000009811 006 L m^^^^^^^^d^^^^^^^^
81
+ 000009811 007 L cr^bn^---auaua
82
+ 000009811 008 L 880715s1971^^^^dcua^^^^^bs^^^00000^eng^^
83
+ 000009811 010 L $$a72179703
84
+ 000009811 035 L $$a(RLIN)MIUG0695316-B
85
+ 000009811 035 L $$a(CaOTULAS)159829127
86
+ 000009811 035 L $$a(OCoLC)ocm00695316
87
+ 000009811 035 L $$asdr-ucsc.b13985097
88
+ 000009811 040 L $$aDLC$$cDLC$$dMiU$$dCStRLIN
89
+ 000009811 043 L $$an-us---
90
+ 000009811 0500 L $$aHD7123$$b.A39 no. 37$$aHD7102.U4
91
+ 000009811 082 L $$a368.4/00973 s$$a368.4/2/00973
92
+ 000009811 086 L $$aII0 aHE 3.49:37
93
+ 000009811 1101 L $$aUnited States.$$bSocial Security Administration.$$bOffice of Research and Statistics.
94
+ 000009811 24510 L $$aFinancing mental health care under medicare and medicaid.
95
+ 000009811 260 L $$a[Washington;$$bFor sale by the Supt. of Docs., U.S. Govt. Print. Off.,$$c1971]
96
+ 000009811 300 L $$avii, 52 p.$$billus.$$c27 cm.
97
+ 000009811 4901 L $$aIts Research report$$vno. 37
98
+ 000009811 500 L $$aSupt. of Docs. no.: HE 3.49:37
99
+ 000009811 504 L $$aIncludes bibliographical references.
100
+ 000009811 538 L $$aMode of access: Internet.
101
+ 000009811 650 0 L $$aMental health insurance$$zUnited States.
102
+ 000009811 650 0 L $$aMedicare
103
+ 000009811 650 0 L $$aMedicaid
104
+ 000009811 830 0 L $$aResearch report (United States. Social Security Administration. Office of Research and Statistics) ;$$vno. 37
105
+ 000009811 8520 L $$aMiU$$bBUHR$$cGRAD$$hHD 7123 .A28 no.37
106
+ 000009811 8520 L $$aMiU$$bTAUB$$hHD 7123 .A28 no.37
107
+ 000009811 852 L $$auc1$$bSDR$$cUCSC
108
+ 000009811 970 L $$aBK$$bBook
109
+ 000009811 970 L $$aCE$$bElectronic Resource
110
+ 000009811 970 L $$aXS$$bStatistics
111
+ 000009811 971 L $$aMiU
112
+ 000009811 972 L $$c20040625
113
+ 000009811 972 L $$c20040625
114
+ 000009811 973 L $$aHT$$bavail_ht
115
+ 000009811 973 L $$aAC$$bavail_circ
116
+ 000009811 974 L $$zno.37$$umdp.39015004063635$$ric
117
+ 000009811 974 L $$zno.37$$uuc1.32106000924313$$ric$$d20091202
118
+ 000009811 998 L $$s9665