marc 0.6.0 → 0.7.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,3 +1,4 @@
1
+ [![Gem Version](https://badge.fury.io/rb/marc.png)](http://badge.fury.io/rb/marc)
1
2
  [![Build Status](https://secure.travis-ci.org/ruby-marc/ruby-marc.png)](http://travis-ci.org/ruby-marc/ruby-marc)
2
3
 
3
4
  marc is a ruby library for reading and writing MAchine Readable Cataloging
data/lib/marc/reader.rb CHANGED
@@ -396,7 +396,7 @@ module MARC
396
396
  # Like Reader ForgivingReader lets you read in a batch of MARC21 records
397
397
  # but it does not use record lengths and field byte offets found in the
398
398
  # leader and directory. It is not unusual to run across MARC records
399
- # which have had their offsets calcualted wrong. In situations like this
399
+ # which have had their offsets calculated wrong. In situations like this
400
400
  # the vanilla Reader may fail, and you can try to use ForgivingReader.
401
401
  #
402
402
  # The one downside to this is that ForgivingReader will assume that the
data/lib/marc/version.rb CHANGED
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.6.0"
2
+ VERSION = "0.7.0"
3
3
  end
@@ -0,0 +1,108 @@
1
+ module MARC
2
+
3
+ # A class for writing MARC records as MARC21.
4
+
5
+ class Writer
6
+
7
+ # the constructor which you must pass a file path
8
+ # or an object that responds to a write message
9
+
10
+ def initialize(file)
11
+ if file.class == String
12
+ @fh = File.new(file,"w")
13
+ elsif file.respond_to?('write')
14
+ @fh = file
15
+ else
16
+ throw "must pass in file name or handle"
17
+ end
18
+ end
19
+
20
+
21
+ # write a record to the file or handle
22
+
23
+ def write(record)
24
+ @fh.write(MARC::Writer.encode(record))
25
+ end
26
+
27
+
28
+ # close underlying filehandle
29
+
30
+ def close
31
+ @fh.close
32
+ end
33
+
34
+
35
+ # a static method that accepts a MARC::Record object
36
+ # and returns the record encoded as MARC21 in transmission format
37
+
38
+ def self.encode(record)
39
+ directory = ''
40
+ fields = ''
41
+ offset = 0
42
+ for field in record.fields
43
+
44
+ # encode the field
45
+ field_data = ''
46
+ if field.class == MARC::DataField
47
+ warn("Warn: Missing indicator") unless field.indicator1 && field.indicator2
48
+ field_data = (field.indicator1 || " ") + (field.indicator2 || " ")
49
+ for s in field.subfields
50
+ field_data += SUBFIELD_INDICATOR + s.code + s.value
51
+ end
52
+ elsif field.class == MARC::ControlField
53
+ field_data = field.value
54
+ end
55
+ field_data += END_OF_FIELD
56
+
57
+ # calculate directory entry for the field
58
+ field_length = (field_data.respond_to?(:bytesize) ?
59
+ field_data.bytesize() :
60
+ field_data.length())
61
+ directory += sprintf("%03s", field.tag) + format_byte_count(field_length, 4) + format_byte_count(offset)
62
+
63
+
64
+ # add field to data for other fields
65
+ fields += field_data
66
+
67
+ # update offset for next field
68
+ offset += field_length
69
+ end
70
+
71
+ # determine the base (leader + directory)
72
+ base = record.leader + directory + END_OF_FIELD
73
+
74
+ # determine complete record
75
+ marc = base + fields + END_OF_RECORD
76
+
77
+ # update leader with the byte offest to the end of the directory
78
+ marc[12..16] = format_byte_count(base.respond_to?(:bytesize) ?
79
+ base.bytesize() :
80
+ base.length()
81
+ )
82
+
83
+ # update the record length
84
+ marc[0..4] = format_byte_count(marc.respond_to?(:bytesize) ?
85
+ marc.bytesize() :
86
+ marc.length()
87
+ )
88
+
89
+ # store updated leader in the record that was passed in
90
+ record.leader = marc[0..LEADER_LENGTH-1]
91
+
92
+ # return encoded marc
93
+ return marc
94
+ end
95
+
96
+ def self.format_byte_count(number, num_digits=5)
97
+ formatted = sprintf("%0#{num_digits}i", number)
98
+ if formatted.length > num_digits
99
+ # uh, oh, we've exceeded our max. Either zero out
100
+ # or raise, depending on settings.
101
+ #formatted = sprintf("%0#{num_digits}i", "")
102
+ formatted = "9" * num_digits
103
+ end
104
+ return formatted
105
+ end
106
+
107
+ end
108
+ end
data/lib/marc/writer.rb CHANGED
@@ -1,8 +1,30 @@
1
1
  module MARC
2
2
 
3
- # A class for writing MARC records as MARC21.
4
-
3
+ # A class for writing MARC records as binary MARC (ISO 2709)
4
+ #
5
+ # == Too-long records
6
+ #
7
+ # The MARC binary format only allows records that are total 99999 bytes long,
8
+ # due to size of a length field in the record.
9
+ #
10
+ # By default, the Writer will raise a MARC::Exception when encountering
11
+ # in-memory records that are too long to be legally written out as ISO 2709
12
+ # binary.
13
+
14
+ # However, if you set `allow_oversized` to true, then the Writer will
15
+ # write these records out anyway, filling in any binary length/offset slots
16
+ # with all 0's, if they are not wide enough to hold the true value.
17
+ # While these records are illegal, they can still be read back in using
18
+ # the MARC::ForgivingReader, as well as other platform MARC readers
19
+ # in tolerant mode.
20
+ #
21
+ # If you set `allow_oversized` to false on the Writer, a MARC::Exception
22
+ # will be raised instead, if you try to write an oversized record.
23
+ #
24
+ # writer = Writer.new(some_path)
25
+ # writer.allow_oversized = true
5
26
  class Writer
27
+ attr_accessor :allow_oversized
6
28
 
7
29
  # the constructor which you must pass a file path
8
30
  # or an object that responds to a write message
@@ -15,13 +37,14 @@ module MARC
15
37
  else
16
38
  throw "must pass in file name or handle"
17
39
  end
40
+ self.allow_oversized = false
18
41
  end
19
42
 
20
43
 
21
44
  # write a record to the file or handle
22
45
 
23
46
  def write(record)
24
- @fh.write(MARC::Writer.encode(record))
47
+ @fh.write(MARC::Writer.encode(record, self.allow_oversized))
25
48
  end
26
49
 
27
50
 
@@ -34,8 +57,10 @@ module MARC
34
57
 
35
58
  # a static method that accepts a MARC::Record object
36
59
  # and returns the record encoded as MARC21 in transmission format
37
-
38
- def self.encode(record)
60
+ #
61
+ # Second arg allow_oversized, default false, set to true
62
+ # to raise on MARC record that can't fit into ISO 2709.
63
+ def self.encode(record, allow_oversized = false)
39
64
  directory = ''
40
65
  fields = ''
41
66
  offset = 0
@@ -58,8 +83,8 @@ module MARC
58
83
  field_length = (field_data.respond_to?(:bytesize) ?
59
84
  field_data.bytesize() :
60
85
  field_data.length())
61
- directory += sprintf("%03s%04i%05i", field.tag, field_length,
62
- offset)
86
+ directory += sprintf("%03s", field.tag) + format_byte_count(field_length, allow_oversized, 4) + format_byte_count(offset, allow_oversized)
87
+
63
88
 
64
89
  # add field to data for other fields
65
90
  fields += field_data
@@ -75,22 +100,42 @@ module MARC
75
100
  marc = base + fields + END_OF_RECORD
76
101
 
77
102
  # update leader with the byte offest to the end of the directory
78
- marc[12..16] = sprintf("%05i", (base.respond_to?(:bytesize) ?
79
- base.bytesize() :
80
- base.length() )
81
- )
103
+ bytesize = base.respond_to?(:bytesize) ? base.bytesize() : base.length()
104
+ marc[12..16] = format_byte_count(bytesize, allow_oversized)
105
+
82
106
 
83
107
  # update the record length
84
- marc[0..4] = sprintf("%05i", (marc.respond_to?(:bytesize) ?
85
- marc.bytesize() :
86
- marc.length())
87
- )
88
-
108
+ bytesize = marc.respond_to?(:bytesize) ? marc.bytesize() : marc.length()
109
+ marc[0..4] = format_byte_count(bytesize, allow_oversized)
110
+
89
111
  # store updated leader in the record that was passed in
90
112
  record.leader = marc[0..LEADER_LENGTH-1]
91
113
 
92
114
  # return encoded marc
93
- return marc
115
+ return marc
94
116
  end
117
+
118
+ # Formats numbers for insertion into marc binary slots.
119
+ # These slots only allow so many digits (and need to be left-padded
120
+ # with spaces to that number of digits). If the number
121
+ # is too big, either an exception will be raised, or
122
+ # we'll return all 0's to proper number of digits.
123
+ #
124
+ # first arg is number, second is boolean whether to allow oversized,
125
+ # third is max digits (default 5)
126
+ def self.format_byte_count(number, allow_oversized, num_digits=5)
127
+ formatted = sprintf("%0#{num_digits}i", number)
128
+ if formatted.length > num_digits
129
+ # uh, oh, we've exceeded our max. Either zero out
130
+ # or raise, depending on settings.
131
+ if allow_oversized
132
+ formatted = sprintf("%0#{num_digits}i", 0)
133
+ else
134
+ raise MARC::Exception.new("Can't write MARC record, as length/offset value of #{number} is too long for the #{num_digits} slot in binary format.")
135
+ end
136
+ end
137
+ return formatted
138
+ end
139
+
95
140
  end
96
141
  end
@@ -3,6 +3,8 @@
3
3
  require 'test/unit'
4
4
  require 'marc'
5
5
 
6
+ require 'stringio'
7
+
6
8
  # Testing char encodings under 1.9, don't bother running
7
9
  # these tests except under 1.9, will either fail (because
8
10
  # 1.9 func the test itself uses isn't there), or trivially pass
data/test/tc_writer.rb CHANGED
@@ -1,6 +1,8 @@
1
1
  require 'test/unit'
2
2
  require 'marc'
3
3
 
4
+ require 'stringio'
5
+
4
6
  class WriterTest < Test::Unit::TestCase
5
7
 
6
8
  def test_writer
@@ -19,6 +21,99 @@ class WriterTest < Test::Unit::TestCase
19
21
  # cleanup
20
22
  File.unlink('test/writer.dat')
21
23
  end
24
+
25
+ # Only in ruby 1.9
26
+ if "".respond_to?(:encoding)
27
+ def test_writer_bad_encoding
28
+ writer = MARC::Writer.new('test/writer.dat')
29
+
30
+
31
+ # MARC::Writer should just happily write out whatever bytes you give it, even
32
+ # mixing encodings that can't be mixed. We ran into an actual example mixing
33
+ # MARC8 (tagged ruby binary) and UTF8, we want it to be written out.
34
+
35
+ record = MARC::Record.new
36
+
37
+ record.append MARC::DataField.new('700', '0', ' ', ['a', "Nhouy Abhay,".force_encoding("BINARY")], ["c", "Th\xE5ao,".force_encoding("BINARY")], ["d", "1909-"])
38
+ record.append MARC::DataField.new('700', '0', ' ', ['a', "Somchin P\xF8\xE5o. Ngin,".force_encoding("BINARY")])
39
+
40
+ record.append MARC::DataField.new('100', '0', '0', ['a', "\xE5angkham. ".force_encoding("BINARY")])
41
+ record.append MARC::DataField.new('245', '1', '0', ['b', "chef-d'oeuvre de la litt\xE2erature lao".force_encoding("BINARY")])
42
+
43
+ # One in UTF8 and marked
44
+ record.append MARC::DataField.new('999', '0', '1', ['a', "chef-d'ocuvre de la littU+FFC3\U+FFA9rature".force_encoding("UTF-8")])
45
+
46
+ writer.write(record)
47
+ writer.close
48
+
49
+ ensure
50
+ File.unlink('test/writer.dat')
51
+ end
52
+ end
53
+
54
+ def test_write_too_long_iso2709
55
+ too_long_record = MARC::Record.new
56
+ 1.upto(1001) do
57
+ too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
58
+ end
59
+
60
+ wbuffer = StringIO.new("", "w")
61
+ writer = MARC::Writer.new(wbuffer)
62
+ writer.allow_oversized = true
63
+
64
+ writer.write(too_long_record)
65
+ writer.close
66
+
67
+ assert_equal "00000", wbuffer.string.slice(0, 5), "zero'd out length bytes when too long"
68
+
69
+ rbuffer = StringIO.new(wbuffer.string.dup)
70
+
71
+ # Regular reader won't read our illegal record.
72
+ #assert_raise(NoMethodError) do
73
+ # reader = MARC::Reader.new(rbuffer)
74
+ # reader.first
75
+ #end
76
+
77
+ # Forgiving reader will, round trippable
78
+ new_record = MARC::Reader.decode(rbuffer.string, :forgiving => true)
79
+ assert_equal too_long_record, new_record, "Too long record round-trippable with forgiving mode"
80
+
81
+ # Test in the middle of a MARC file
82
+ good_record = MARC::Record.new
83
+ good_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A short record'])
84
+ wbuffer = StringIO.new("", "w")
85
+ writer = MARC::Writer.new(wbuffer)
86
+ writer.allow_oversized = true
87
+
88
+ writer.write(good_record)
89
+ writer.write(too_long_record)
90
+ writer.write(good_record)
91
+
92
+ rbuffer = StringIO.new(wbuffer.string.dup)
93
+ reader = MARC::ForgivingReader.new(rbuffer)
94
+ records = reader.to_a
95
+
96
+ assert_equal 3, records.length
97
+ assert_equal good_record, records[0]
98
+ assert_equal good_record, records[2]
99
+ assert_equal too_long_record, records[1]
100
+ end
101
+
102
+ def test_raises_on_too_long_if_configured
103
+ too_long_record = MARC::Record.new
104
+ 1.upto(1001) do
105
+ too_long_record.append MARC::DataField.new("500", ' ', ' ', ['a', 'A really long record.1234567890123456789012345678901234567890123456789012345678901234567890123456789'])
106
+ end
107
+
108
+ wbuffer = StringIO.new("", "w")
109
+ writer = MARC::Writer.new(wbuffer)
110
+
111
+ assert_raise(MARC::Exception) do
112
+ writer.write too_long_record
113
+ end
114
+
115
+ end
116
+
22
117
 
23
118
  def test_forgiving_writer
24
119
  marc = "00305cam a2200133 a 4500001000700000003000900007005001700016008004100033008004100074035002500115245001700140909001000157909000400167\036635145\036UK-BiLMS\03620060329173705.0\036s1982iieng6 000 0 eng||\036060116|||||||||xxk eng||\036 \037a(UK-BiLMS)M0017366ZW\03600\037aTest record.\036 \037aa\037b\037c\036\037b0\036\035\000"
@@ -29,18 +124,18 @@ class WriterTest < Test::Unit::TestCase
29
124
  end
30
125
 
31
126
  def test_unicode_roundtrip
32
- record = MARC::Reader.new('test/utf8.marc').first
127
+ record = MARC::Reader.new('test/utf8.marc', :external_encoding => "UTF-8").first
33
128
 
34
129
  writer = MARC::Writer.new('test/writer.dat')
35
130
  writer.write(record)
36
131
  writer.close
37
132
 
38
- read_back_record = MARC::Reader.new('test/writer.dat').first
133
+ read_back_record = MARC::Reader.new('test/writer.dat', :external_encoding => "UTF-8").first
39
134
 
40
135
  # Make sure the one we wrote out then read in again
41
136
  # is the same as the one we read the first time
42
137
  # Looks like "==" is over-ridden to do that. Don't ever change, #==
43
- assert (record == read_back_record), "Round-tripped record must equal original record"
138
+ assert_equal record, read_back_record, "Round-tripped record must equal original record"
44
139
  end
45
140
 
46
141
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.6.0
4
+ version: 0.7.0
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors:
@@ -13,7 +13,7 @@ authors:
13
13
  autorequire: marc
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2013-08-19 00:00:00.000000000 Z
16
+ date: 2013-09-03 00:00:00.000000000 Z
17
17
  dependencies: []
18
18
  description:
19
19
  email: ehs@pobox.com
@@ -30,6 +30,7 @@ files:
30
30
  - lib/marc/record.rb
31
31
  - lib/marc/subfield.rb
32
32
  - lib/marc/version.rb
33
+ - lib/marc/writer-NEW.rb
33
34
  - lib/marc/writer.rb
34
35
  - lib/marc/xml_parsers.rb
35
36
  - lib/marc/xmlreader.rb