marc4j4r 0.1.3 → 0.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -63,11 +63,11 @@
63
63
  <p class="children">
64
64
 
65
65
 
66
- <strong class="modules">Modules:</strong> <a href="MARC4J4R.html" title="MARC4J4R (module)">MARC4J4R</a>
66
+ <strong class="modules">Modules:</strong> <a href="MARC.html" title="MARC (module)">MARC</a>, <a href="MARC4J4R.html" title="MARC4J4R (module)">MARC4J4R</a>
67
67
 
68
68
 
69
69
 
70
- <strong class="classes">Classes:</strong> <a href="ControlFieldImpl.html" title="ControlFieldImpl (class)">ControlFieldImpl</a>, <a href="DataFieldImpl.html" title="DataFieldImpl (class)">DataFieldImpl</a>, <a href="RecordImpl.html" title="RecordImpl (class)">RecordImpl</a>, <a href="SubfieldImpl.html" title="SubfieldImpl (class)">SubfieldImpl</a>
70
+ <strong class="classes">Classes:</strong> <a href="ASReader.html" title="ASReader (class)">ASReader</a>, <a href="ControlFieldImpl.html" title="ControlFieldImpl (class)">ControlFieldImpl</a>, <a href="DataFieldImpl.html" title="DataFieldImpl (class)">DataFieldImpl</a>, <a href="RecordImpl.html" title="RecordImpl (class)">RecordImpl</a>, <a href="SubfieldImpl.html" title="SubfieldImpl (class)">SubfieldImpl</a>
71
71
 
72
72
 
73
73
  </p>
@@ -78,7 +78,7 @@
78
78
  </div>
79
79
 
80
80
  <div id="footer">
81
- Generated on Fri Feb 12 16:38:07 2010 by
81
+ Generated on Sat Apr 3 22:01:08 2010 by
82
82
  <a href="http://yardoc.org" title="Yay! A Ruby Documentation Tool">yard</a>
83
83
  0.5.3 (ruby-1.8.7).
84
84
  </div>
Binary file
data/jars/marc4j.jar CHANGED
Binary file
data/lib/marc4j4r.rb CHANGED
@@ -11,6 +11,28 @@ end
11
11
 
12
12
  require 'set'
13
13
 
14
+
15
+ # Re-open the MarcReader interface, define #each and include Enumerable
16
+ #
17
+ # We also automatically call #hashify on the records that stream through
18
+ # #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
19
+ # record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
20
+ # by another 520)
21
+
22
+ module Java::OrgMarc4j::MarcReader
23
+ include Enumerable
24
+
25
+ # Return the next record, after calling #hashify on it
26
+ def each(hashify=true)
27
+ while self.hasNext
28
+ r = self.next
29
+ r.hashify if hashify
30
+ yield r
31
+ end
32
+ end
33
+ end
34
+
35
+
14
36
  module MARC4J4R
15
37
 
16
38
  # Add some sugar to the MarcReader interface
@@ -31,10 +53,11 @@ module MARC4J4R
31
53
  # can't just add it to the MarcReader interface the way I wanted to.
32
54
 
33
55
  NEWINIT = <<-ENDBINDER
56
+ include Enumerable
34
57
  alias_method :oldinit, :initialize
35
58
  def initialize(fromwhere)
36
59
  stream = nil
37
- if fromwhere.is_a? Java::JavaIO::InputStream
60
+ if fromwhere.is_a? Java::JavaIO::InputStream or fromwhere.is_a? Java::JavaIO::ByteArrayInputStream
38
61
  stream = fromwhere
39
62
  elsif fromwhere.is_a? IO
40
63
  stream = fromwhere.to_inputstream
@@ -54,6 +77,7 @@ module MARC4J4R
54
77
  Java::org.marc4j.MarcXmlReader.module_eval(NEWINIT)
55
78
 
56
79
 
80
+
57
81
  # Get a marc reader of the appropriate type
58
82
  # @param [String, IO, java.io.InputStream] input The IO stream (or filename) from which you want to read
59
83
  # @param [:strictmarc, :permissivemarc, :marcxml] The type of MARC reader you want.
@@ -75,43 +99,84 @@ module MARC4J4R
75
99
 
76
100
  def reader(input, type = :strictmarc)
77
101
  case type
78
- when :strictmarc
102
+ when :strictmarc then
79
103
  return Java::org.marc4j.MarcStreamReader.new(input)
80
- when :permissivemarc
104
+ when :permissivemarc then
81
105
  return Java::org.marc4j.MarcPermissiveStreamReader.new(input)
82
- when :marcxml
106
+ when :marcxml then
83
107
  return Java::org.marc4j.MarcXmlReader.new(input)
108
+ when :alephsequential then
109
+ return MARC4J4R::AlephSequentialReader.new(input)
84
110
  else
85
- raise ArgumentError, "Reader type must be :strictmarc, :permissivemarc, or :marcxml"
111
+ raise ArgumentError, "Reader type #{type} illegal: must be :strictmarc, :permissivemarc, :marcxml, or :alephsequential"
86
112
  end
87
113
  end
88
114
  module_function :reader
89
-
90
- end
91
-
92
-
93
- # Re-open the MarcReader interface, define #each and include Enumerable
94
- #
95
- # We also automatically call #hashify on the records that stream through
96
- # #each in order to speed up RecordImpl#[] when (a) doing many operations on a single
97
- # record, and (b) we're not worried about interleaved tags (e.g., a 520 followed by a 510 followed
98
- # by another 520)
99
-
100
- module Java::OrgMarc4j::MarcReader
101
- include Enumerable
102
115
 
103
- # Return the next record, after calling #hashify on it
104
- def each
105
- while self.hasNext
106
- r = self.next
107
- r.hashify
108
- yield r
116
+
117
+ # Implement an AlephSequential reader
118
+ class AlephSequentialReader
119
+ include Enumerable
120
+ def initialize(fromwhere)
121
+ stream = nil
122
+ if fromwhere.is_a? Java::JavaIO::InputStream
123
+ stream = fromwhere.to_io
124
+ elsif fromwhere.is_a? IO
125
+ stream = fromwhere
126
+ else
127
+ stream = File.new(fromwhere)
128
+ end
129
+
130
+ @handle = stream
109
131
  end
110
- end
132
+
133
+ def each
134
+ record = nil
135
+ currentID = nil
136
+
137
+ @handle.each_line do |l|
138
+ l.chomp!
139
+ next unless l =~ /\S/
140
+ vals = l.unpack('a9 a a3 c c a3 a*')
141
+ id, tag, ind1, ind2, data = vals[0], vals[2], vals[3], vals[4], vals[6]
142
+ # id, tag, ind1, ind2, junk, data = *(l.unpack('A10 a3 c c a3 A*'))
143
+ if id != currentID
144
+ if record
145
+ yield record
146
+ end
147
+ record = RecordImpl.new
148
+ currentID = id
149
+ end
150
+ if tag == 'LDR'
151
+ record.setLeader(Java::org.marc4j.marc.impl.LeaderImpl.new(data))
152
+ else
153
+ record << buildField(tag,ind1,ind2,data)
154
+ end
155
+ end
156
+ yield record
157
+ end
158
+
159
+
160
+ SUBREGEXP = /\$\$(.)/
161
+ def buildField (tag, ind1, ind2, data)
162
+ if Java::org.marc4j.marc.impl.Verifier.isControlField tag
163
+ return Java::org.marc4j.marc.impl.ControlFieldImpl.new(tag, data)
164
+ else
165
+ f = Java::org.marc4j.marc.impl.DataFieldImpl.new(tag, ind1, ind2)
166
+ data.split(SUBREGEXP)[1..-1].each_slice(2) do |code, value|
167
+ f.addSubfield Java::org.marc4j.marc.impl.SubfieldImpl.new(code[0].ord, value)
168
+ end
169
+ return f
170
+ end
171
+ end
172
+
173
+ end # End of class AlephSequentialReader
174
+
111
175
  end
112
176
 
113
177
 
114
178
 
179
+
115
180
  include_class Java::org.marc4j.marc.impl::RecordImpl
116
181
  include_class Java::org.marc4j.marc.impl::ControlFieldImpl
117
182
  include_class Java::org.marc4j.marc.impl::DataFieldImpl
@@ -122,6 +187,38 @@ include_class Java::org.marc4j.marc.impl::SubfieldImpl
122
187
 
123
188
  class RecordImpl
124
189
  include Enumerable
190
+
191
+ alias_method :<<, :addVariableField
192
+ alias_method :append, :addVariableField
193
+ alias_method :fields, :getVariableFields
194
+
195
+ # Export as a MARC-Hash, as described at
196
+ # http://robotlibrarian.billdueber.com/marc-hash-the-saga-continues-now-with-even-less-structure/
197
+ # @return A marc-hash representation of the record, suitable for calling .to_json on or whatever
198
+ def to_marchash
199
+ h = {}
200
+ h['type'] = 'marc-hash'
201
+ h['version'] = [1,0]
202
+ h['leader'] = self.leader
203
+
204
+ fields = []
205
+
206
+ self.getVariableFields.each do |f|
207
+ if f.controlField?
208
+ fields << [f.tag, f.value]
209
+ else
210
+ farray = [f.tag, f.indicator1 || ' ', f.indicator2 || ' ']
211
+ subs = []
212
+ f.each do |subfield|
213
+ subs << [subfield.code, subfield.value]
214
+ end
215
+ farray.push subs
216
+ fields << farray
217
+ end
218
+ end
219
+ h['fields'] = fields
220
+ return h
221
+ end
125
222
 
126
223
  # Create a local hash by tag number; makes some stuff faster
127
224
  # Called automatically if you use reader.each
@@ -148,7 +245,8 @@ class RecordImpl
148
245
  def leader
149
246
  self.get_leader.toString
150
247
  end
151
-
248
+
249
+
152
250
  # Cycle through the fields in the order the appear in the record
153
251
  def each
154
252
  self.getVariableFields.each do |f|
@@ -163,7 +261,11 @@ class RecordImpl
163
261
 
164
262
  def [] tag
165
263
  if defined? @hashedtags
166
- return @hashedtags[tag][0]
264
+ if @hashedtags[tag]
265
+ return @hashedtags[tag][0]
266
+ else
267
+ return nil
268
+ end
167
269
  else
168
270
  return self.getVariableField(tag)
169
271
  end
@@ -209,7 +311,7 @@ class RecordImpl
209
311
  # # 035 $a (OCoLC)ocm00001728
210
312
 
211
313
  def find_by_tag(tags, originalorder = false)
212
- self.hashify unless @hashedtags
314
+ self.hashify unless @hashedtags and !originalorder
213
315
  if !tags.is_a? Array
214
316
  return @hashedtags[tags] || []
215
317
  end
@@ -221,18 +323,39 @@ class RecordImpl
221
323
  end
222
324
  end
223
325
 
326
+
224
327
 
225
328
  # Return the record as valid MARC-XML
226
329
  # @return String A MARC-XML representation of the record, including the XML header
227
330
  def to_xml
228
331
  return @xml if @xml
229
- @xml = java.io.StringWriter.new
230
- res = javax.xml.transform.stream.StreamResult.new(@xml)
231
- writer = org.marc4j.MarcXmlWriter.new(res)
232
- writer.write(self)
233
- return @xml.toString
332
+ begin
333
+ @xml = java.io.StringWriter.new
334
+ res = javax.xml.transform.stream.StreamResult.new(@xml)
335
+ writer = org.marc4j.MarcXmlWriter.new(res)
336
+ writer.write(self)
337
+ writer.writeEndDocument
338
+ return @xml.toString
339
+ rescue
340
+ "Woops! to_xml failed for record #{self['001'].data}: #{$!}"
341
+ end
342
+ end
343
+
344
+ def to_marc
345
+ begin
346
+ s = Java::java.io.ByteArrayOutputStream.new
347
+ writer = org.marc4j.MarcStreamWriter.new(s)
348
+ writer.write(self)
349
+ @marcbinary = s.to_string
350
+ puts @marcbinary
351
+ return @marcbinary
352
+ rescue
353
+ # "Woops! to_marc failed for record #{self['001'].data}: #{$!}"
354
+ "Whoops! Failed: #{$!}"
355
+ end
234
356
  end
235
-
357
+
358
+
236
359
  end
237
360
 
238
361
  class ControlFieldImpl
@@ -240,6 +363,14 @@ class ControlFieldImpl
240
363
  return self.data
241
364
  end
242
365
 
366
+ def controlField?
367
+ return true
368
+ end
369
+
370
+ def self.control_tag? tag
371
+ return Java::org.marc4j.marc.impl.Verifier.isControlField tag
372
+ end
373
+
243
374
  # Pretty-print
244
375
  # @param [String] joiner What string to use to join the subfields
245
376
  # @param [String] The pretty string
@@ -251,6 +382,20 @@ end
251
382
 
252
383
  class DataFieldImpl
253
384
  include Enumerable
385
+
386
+ alias_method :<<, :addSubfield
387
+
388
+
389
+ def controlField?
390
+ return false
391
+ end
392
+
393
+ # Broken. Need to check subs as well
394
+ def == other
395
+ self.tag == other.tag and
396
+ self.indicator1 == other.indicator1 and
397
+ self.indicator2 == other.indicator2
398
+ end
254
399
 
255
400
  # Pretty-print
256
401
  # @param [String] joiner What string to use to join the subfields
@@ -268,9 +413,13 @@ class DataFieldImpl
268
413
  # @return [String] The value of the first matched subfield
269
414
  def [] code
270
415
  raise ArgumentError, "Code must be a one-character string, not #{code}" unless code.is_a? String and code.size == 1
271
- # note that code[0] is just converting the one-character string into an integer
272
- # char value that the underlying java can deal with
273
- self.getSubfield(code[0]).getData
416
+ # need to send a char value that the underlying java can deal with
417
+ sub = self.getSubfield(code[0].ord)
418
+ if (sub)
419
+ return sub.getData
420
+ else
421
+ return nil
422
+ end
274
423
  end
275
424
 
276
425
 
@@ -345,6 +494,10 @@ end
345
494
 
346
495
  class SubfieldImpl
347
496
 
497
+ def == other
498
+ return self.code == other.code and self.data == other.data
499
+ end
500
+
348
501
  def value
349
502
  return self.data
350
503
  end
data/test/batch.seq ADDED
@@ -0,0 +1,118 @@
1
+ 000004262 LDR L ^^^^^nam^a22003011^^4500
2
+ 000004262 001 L 000004262
3
+ 000004262 005 L 19891107000000.0
4
+ 000004262 006 L m^^^^^^^^d^^^^^^^^
5
+ 000004262 007 L cr^bn^---auaua
6
+ 000004262 008 L 880715r19721939nyua^^^^^b^^^^00100^eng^^
7
+ 000004262 010 L $$a72001885/MN
8
+ 000004262 020 L $$a0306705133
9
+ 000004262 035 L $$a(RLIN)MIUG0297757-B
10
+ 000004262 035 L $$a(CaOTULAS)159822964
11
+ 000004262 035 L $$a(OCoLC)ocm00297757
12
+ 000004262 035 L $$asdr-inu1221428
13
+ 000004262 040 L $$aDLC$$cDLC$$dMiU
14
+ 000004262 043 L $$an------
15
+ 000004262 0500 L $$aML3557$$b.D3645 1972
16
+ 000004262 082 L $$a784.7/51
17
+ 000004262 1001 L $$aDensmore, Frances,$$d1867-1957.
18
+ 000004262 24510 L $$aNootka and Quileute music.
19
+ 000004262 260 L $$aNew York,$$bDa Capo Press,$$c1972.
20
+ 000004262 300 L $$axxvi, 358 p.$$billus.$$c23 cm.
21
+ 000004262 4900 L $$aDa Capo Press music reprint series
22
+ 000004262 500 L $$aReprint of the 1939 ed., which was issued as Bulletin 124 of Smithsonian Institution. Bureau of American Ethnology.
23
+ 000004262 504 L $$aBibliography: p. 349.
24
+ 000004262 538 L $$aMode of access: Internet.
25
+ 000004262 650 0 L $$aNootka Indians$$xMusic.
26
+ 000004262 650 0 L $$aQuileute Indians$$xMusic.
27
+ 000004262 8520 L $$aMiU$$bMUSIC$$hML3557 .D4255 1972
28
+ 000004262 852 L $$ainu$$bSDR$$cINU
29
+ 000004262 970 L $$aBK$$bBook
30
+ 000004262 970 L $$aCE$$bElectronic Resource
31
+ 000004262 971 L $$aMiU
32
+ 000004262 972 L $$c20040625
33
+ 000004262 973 L $$aHT$$bavail_ht
34
+ 000004262 973 L $$aAC$$bavail_circ
35
+ 000004262 974 L $$uinu.30000053901769$$ric$$d20100226
36
+ 000004262 998 L $$s9665
37
+ 000005951 LDR L ^^^^^nam^a22002771^^4500
38
+ 000005951 001 L 000005951
39
+ 000005951 005 L 19970922000000.0
40
+ 000005951 006 L m^^^^^^^^d^^^^^^^^
41
+ 000005951 007 L cr^bn^---auaua
42
+ 000005951 008 L 880715s1968^^^^dcu^^^^^^b^^^|00010^eng^c
43
+ 000005951 010 L $$ahew68000006
44
+ 000005951 035 L $$a(RLIN)MIUG0425743-B
45
+ 000005951 035 L $$a(CaOTULAS)159824865
46
+ 000005951 035 L $$a(OCoLC)ocm00425743
47
+ 000005951 035 L $$asdr-ucsc.b16458175
48
+ 000005951 040 L $$a*U.S. Dept. of Health Education, and Welfare. Li$$cDLC$$dMiU
49
+ 000005951 0500 L $$aHD7123$$b.A39 no. 22
50
+ 000005951 082 L $$a301.5/5
51
+ 000005951 1001 L $$aKreps, Juanita Morris.
52
+ 000005951 24510 L $$aLifetime allocation of work and leisure,$$cby Juanita M. Kreps.
53
+ 000005951 260 L $$a[Washington,$$bFor sale by Supt. of Docs., U.S. Govt. Print. Off.,$$c1968]
54
+ 000005951 300 L $$aix, 44 p.$$c24 cm.
55
+ 000005951 4901 L $$aUnited States. Social Security Administration. Office of Research and Statistics. Research report$$vno. 22
56
+ 000005951 504 L $$aBibliographical footnotes.
57
+ 000005951 538 L $$aMode of access: Internet.
58
+ 000005951 650 0 L $$aAge and employment
59
+ 000005951 650 0 L $$aRetirement
60
+ 000005951 830 0 L $$aResearch report (United States. Social Security Administration. Office of Research and Statistics) ;$$vno. 22
61
+ 000005951 8520 L $$aMiU$$bTAUB$$hHD6279 .K925
62
+ 000005951 8520 L $$aMiU$$bTAUB$$hHD 7123 .A28 no.22
63
+ 000005951 8520 L $$aMiU$$bBUHR$$cGRAD$$hHD 7123 .A28 no.22
64
+ 000005951 852 L $$auc1$$bSDR$$cUCSC
65
+ 000005951 970 L $$aBK$$bBook
66
+ 000005951 970 L $$aCE$$bElectronic Resource
67
+ 000005951 971 L $$aMiU
68
+ 000005951 972 L $$c20040625
69
+ 000005951 972 L $$c20040625
70
+ 000005951 972 L $$c20040625
71
+ 000005951 973 L $$aHT$$bavail_ht
72
+ 000005951 973 L $$aAC$$bavail_circ
73
+ 000005951 974 L $$umdp.39015038814847$$ric
74
+ 000005951 974 L $$umdp.39015072105847$$ric$$d20091001
75
+ 000005951 974 L $$zno.22$$uuc1.32106000924198$$ric$$d20091202
76
+ 000005951 998 L $$s9665
77
+ 000009811 LDR L ^^^^^nam^a22003251^^4500
78
+ 000009811 001 L 000009811
79
+ 000009811 005 L 19970922000000.0
80
+ 000009811 006 L m^^^^^^^^d^^^^^^^^
81
+ 000009811 007 L cr^bn^---auaua
82
+ 000009811 008 L 880715s1971^^^^dcua^^^^^bs^^^00000^eng^^
83
+ 000009811 010 L $$a72179703
84
+ 000009811 035 L $$a(RLIN)MIUG0695316-B
85
+ 000009811 035 L $$a(CaOTULAS)159829127
86
+ 000009811 035 L $$a(OCoLC)ocm00695316
87
+ 000009811 035 L $$asdr-ucsc.b13985097
88
+ 000009811 040 L $$aDLC$$cDLC$$dMiU$$dCStRLIN
89
+ 000009811 043 L $$an-us---
90
+ 000009811 0500 L $$aHD7123$$b.A39 no. 37$$aHD7102.U4
91
+ 000009811 082 L $$a368.4/00973 s$$a368.4/2/00973
92
+ 000009811 086 L $$aII0 aHE 3.49:37
93
+ 000009811 1101 L $$aUnited States.$$bSocial Security Administration.$$bOffice of Research and Statistics.
94
+ 000009811 24510 L $$aFinancing mental health care under medicare and medicaid.
95
+ 000009811 260 L $$a[Washington;$$bFor sale by the Supt. of Docs., U.S. Govt. Print. Off.,$$c1971]
96
+ 000009811 300 L $$avii, 52 p.$$billus.$$c27 cm.
97
+ 000009811 4901 L $$aIts Research report$$vno. 37
98
+ 000009811 500 L $$aSupt. of Docs. no.: HE 3.49:37
99
+ 000009811 504 L $$aIncludes bibliographical references.
100
+ 000009811 538 L $$aMode of access: Internet.
101
+ 000009811 650 0 L $$aMental health insurance$$zUnited States.
102
+ 000009811 650 0 L $$aMedicare
103
+ 000009811 650 0 L $$aMedicaid
104
+ 000009811 830 0 L $$aResearch report (United States. Social Security Administration. Office of Research and Statistics) ;$$vno. 37
105
+ 000009811 8520 L $$aMiU$$bBUHR$$cGRAD$$hHD 7123 .A28 no.37
106
+ 000009811 8520 L $$aMiU$$bTAUB$$hHD 7123 .A28 no.37
107
+ 000009811 852 L $$auc1$$bSDR$$cUCSC
108
+ 000009811 970 L $$aBK$$bBook
109
+ 000009811 970 L $$aCE$$bElectronic Resource
110
+ 000009811 970 L $$aXS$$bStatistics
111
+ 000009811 971 L $$aMiU
112
+ 000009811 972 L $$c20040625
113
+ 000009811 972 L $$c20040625
114
+ 000009811 973 L $$aHT$$bavail_ht
115
+ 000009811 973 L $$aAC$$bavail_circ
116
+ 000009811 974 L $$zno.37$$umdp.39015004063635$$ric
117
+ 000009811 974 L $$zno.37$$uuc1.32106000924313$$ric$$d20091202
118
+ 000009811 998 L $$s9665