marc 1.0.4 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +106 -29
  7. data/Gemfile +15 -0
  8. data/README.md +240 -47
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -86
  22. data/lib/marc/reader.rb +119 -121
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -81
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +23 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -32
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +99 -87
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +94 -9
data/lib/marc/reader.rb CHANGED
@@ -1,10 +1,10 @@
1
- require 'scrub_rb'
1
+ require "scrub_rb"
2
2
 
3
3
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
4
  # only when necessary
5
5
 
6
6
  module MARC
7
- # A class for reading MARC binary (ISO 2709) files.
7
+ # A class for reading MARC binary (ISO 2709) files.
8
8
  #
9
9
  # == Character Encoding
10
10
  #
@@ -12,7 +12,7 @@ module MARC
12
12
  # If illegal bytes for that character encoding are encountered in certain
13
13
  # operations, ruby will raise an exception. If a String is incorrectly
14
14
  # tagged with the wrong character encoding, that makes it fairly likely
15
- # an illegal byte for the specified encoding will be encountered.
15
+ # an illegal byte for the specified encoding will be encountered.
16
16
  #
17
17
  # So when reading binary MARC data with the MARC::Reader, it's important
18
18
  # that you let it know the expected encoding:
@@ -21,7 +21,7 @@ module MARC
21
21
  #
22
22
  # If you leave off 'external_encoding', it will use the ruby environment
23
23
  # Encoding.default_external, which is usually UTF-8 but may depend on your
24
- # environment.
24
+ # environment.
25
25
  #
26
26
  # Even if you expect your data to be (eg) UTF-8, it may include bad/illegal
27
27
  # bytes. By default MARC::Reader will leave these in the produced Strings,
@@ -29,58 +29,58 @@ module MARC
29
29
  # to catch this early, and ask MARC::Reader to raise immediately on illegal
30
30
  # bytes:
31
31
  #
32
- # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
32
+ # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
33
33
  # :validate_encoding => true)
34
34
  #
35
35
  # Alternately, you can have MARC::Reader replace illegal bytes
36
36
  # with the Unicode Replacement Character, or with a string
37
37
  # of your choice (including the empty string, meaning just omit the bad bytes)
38
38
  #
39
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
39
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
40
40
  # :invalid => :replace)
41
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
41
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
42
42
  # :invalid => :replace, :replace => "")
43
43
  #
44
44
  # If you supply an :external_encoding argument, MARC::Reader will
45
45
  # always assume that encoding -- if you leave it off, MARC::Reader
46
46
  # will use the encoding tagged on any input you pass in, such
47
- # as Strings or File handles.
47
+ # as Strings or File handles.
48
48
  #
49
49
  # # marc data will have same encoding as string.encoding:
50
50
  # MARC::Reader.decode( string )
51
51
  #
52
52
  # # Same, values will have encoding of string.encoding:
53
- # MARC::Reader.new(StringIO.new(string))
53
+ # MARC::Reader.new(StringIO.new(string))
54
54
  #
55
55
  # # data values will have cp866 encoding, per external_encoding of
56
56
  # # File object passed in
57
57
  # MARC::Reader.new(File.new("myfile.marc", "r:cp866"))
58
58
  #
59
59
  # # explicitly tell MARC::Reader the encoding
60
- # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
60
+ # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
61
61
  #
62
62
  # === MARC-8
63
63
  #
64
64
  # The legacy MARC-8 encoding needs to be handled differently, because
65
- # there is no built-in support in ruby for MARC-8.
65
+ # there is no built-in support in ruby for MARC-8.
66
66
  #
67
67
  # You _can_ specify "MARC-8" as an external encoding. It will trigger
68
- # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
68
+ # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
69
69
  #
70
70
  # MARC::Reader.new("marc8.mrc", :external_encoding => "MARC-8")
71
71
  #
72
72
  # For external_encoding "MARC-8", :validate_encoding is always true,
73
73
  # there's no way to ignore bad bytes in MARC-8 when transcoding to
74
- # unicode. However, just as with other encodings, the
74
+ # unicode. However, just as with other encodings, the
75
75
  # `:invalid => :replace` and `:replace => "string"`
76
- # options can be used to replace bad bytes instead of raising.
76
+ # options can be used to replace bad bytes instead of raising.
77
77
  #
78
78
  # If you want your MARC-8 to be transcoded internally to something
79
79
  # other than UTF-8, you can use the :internal_encoding option
80
- # which works with any encoding in MARC::Reader.
80
+ # which works with any encoding in MARC::Reader.
81
81
  #
82
- # MARC::Reader.new("marc8.mrc",
83
- # :external_encoding => "MARC-8",
82
+ # MARC::Reader.new("marc8.mrc",
83
+ # :external_encoding => "MARC-8",
84
84
  # :internal_encoding => "UTF-16LE")
85
85
  #
86
86
  # If you want to read in MARC-8 without transcoding, leaving the
@@ -90,48 +90,48 @@ module MARC
90
90
  #
91
91
  # MARC::Reader.new("marc8.mrc", :external_encoding => "binary")
92
92
  #
93
- # Please note that MARC::Reader does _not_ currently have any facilities
94
- # for guessing encoding from MARC21 leader byte 9, that is ignored.
93
+ # Please note that MARC::Reader does _not_ currently have any facilities
94
+ # for guessing encoding from MARC21 leader byte 9, that is ignored.
95
95
  #
96
96
  # === Complete Encoding Options
97
97
  #
98
98
  # These options can all be used on MARC::Reader.new _or_ MARC::Reader.decode
99
99
  # to specify external encoding, ask for a transcode to a different
100
- # encoding on read, or validate or replace bad bytes in source.
100
+ # encoding on read, or validate or replace bad bytes in source.
101
101
  #
102
102
  # [:external_encoding]
103
103
  # What encoding to consider the MARC record's values to be in. This option
104
- # takes precedence over the File handle or String argument's encodings.
104
+ # takes precedence over the File handle or String argument's encodings.
105
105
  # [:internal_encoding]
106
106
  # Ask MARC::Reader to transcode to this encoding in memory after reading
107
- # the file in.
107
+ # the file in.
108
108
  # [:validate_encoding]
109
109
  # If you pass in `true`, MARC::Reader will promise to raise an Encoding::InvalidByteSequenceError
110
110
  # if there are illegal bytes in the source for the :external_encoding. There is
111
111
  # a performance penalty for this check. Without this option, an exception
112
- # _may_ or _may not_ be raised, and whether an exception or raised (or
112
+ # _may_ or _may not_ be raised, and whether an exception or raised (or
113
113
  # what class the exception has) may change in future ruby-marc versions
114
- # without warning.
114
+ # without warning.
115
115
  # [:invalid]
116
116
  # Just like String#encode, set to :replace and any bytes in source data
117
- # illegal for the source encoding will be replaced with the unicode
117
+ # illegal for the source encoding will be replaced with the unicode
118
118
  # replacement character (when in unicode encodings), or else '?'. Overrides
119
119
  # :validate_encoding. This can help you sanitize your input and
120
- # avoid ruby "invalid UTF-8 byte" exceptions later.
120
+ # avoid ruby "invalid UTF-8 byte" exceptions later.
121
121
  # [:replace]
122
122
  # Just like String#encode, combine with `:invalid=>:replace`, set
123
123
  # your own replacement string for invalid bytes. You may use the
124
- # empty string to simply eliminate invalid bytes.
124
+ # empty string to simply eliminate invalid bytes.
125
125
  #
126
126
  # === Warning on ruby File's own :internal_encoding, and unsafe transcoding from ruby
127
127
  #
128
- # Be careful with using an explicit File object with the File's own
129
- # :internal_encoding set -- it can cause ruby to transcode your data
130
- # _before_ MARC::Reader gets it, changing the bytecount and making the
128
+ # Be careful with using an explicit File object with the File's own
129
+ # :internal_encoding set -- it can cause ruby to transcode your data
130
+ # _before_ MARC::Reader gets it, changing the bytecount and making the
131
131
  # marc record unreadable in some cases. This
132
132
  # applies to Encoding.default_encoding too!
133
133
  #
134
- # # May in some cases result in unreadable marc and an exception
134
+ # # May in some cases result in unreadable marc and an exception
135
135
  # MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866:utf-8") )
136
136
  #
137
137
  # # May in some cases result in unreadable marc and an exception
@@ -156,7 +156,7 @@ module MARC
156
156
  # https://jira.codehaus.org/browse/JRUBY-6637
157
157
  #
158
158
  # We recommend using the latest version of jruby, especially
159
- # at least jruby 1.7.6.
159
+ # at least jruby 1.7.6.
160
160
  class Reader
161
161
  include Enumerable
162
162
 
@@ -182,43 +182,42 @@ module MARC
182
182
  #
183
183
  # Also, if your data encoded with non ascii/utf-8 encoding
184
184
  # (for ex. when reading RUSMARC data) and you use ruby 1.9
185
- # you can specify source data encoding with an option.
185
+ # you can specify source data encoding with an option.
186
186
  #
187
187
  # reader = MARC::Reader.new('marc.dat', :external_encoding => 'cp866')
188
188
  #
189
189
  # or, you can pass IO, opened in the corresponding encoding
190
190
  #
191
191
  # reader = MARC::Reader.new(File.new('marc.dat', 'r:cp866'))
192
- def initialize(file, options = {})
192
+ def initialize(file, options = {})
193
193
  @encoding_options = {}
194
194
  # all can be nil
195
195
  [:internal_encoding, :external_encoding, :invalid, :replace, :validate_encoding].each do |key|
196
196
  @encoding_options[key] = options[key] if options.has_key?(key)
197
197
  end
198
-
199
- if file.is_a?(String)
198
+
199
+ if file.is_a?(String)
200
200
  @handle = File.new(file)
201
- elsif file.respond_to?("read", 5)
201
+ elsif file.respond_to?(:read, 5)
202
202
  @handle = file
203
203
  else
204
204
  raise ArgumentError, "must pass in path or file"
205
205
  end
206
-
207
- if (! @encoding_options[:external_encoding] ) && @handle.respond_to?(:external_encoding)
206
+
207
+ if (!@encoding_options[:external_encoding]) && @handle.respond_to?(:external_encoding)
208
208
  # use file encoding only if we didn't already have an explicit one,
209
- # explicit one takes precedence.
209
+ # explicit one takes precedence.
210
210
  #
211
211
  # Note, please don't use ruby's own internal_encoding transcode
212
212
  # with binary marc data, the transcode can mess up the byte count
213
- # and make it unreadable.
213
+ # and make it unreadable.
214
214
  @encoding_options[:external_encoding] ||= @handle.external_encoding
215
215
  end
216
216
 
217
217
  # Only pull in the MARC8 translation if we need it, since it's really big
218
- if @encoding_options[:external_encoding] == "MARC-8"
219
- require 'marc/marc8/to_unicode' unless defined? MARC::Marc8::ToUnicode
218
+ if @encoding_options[:external_encoding] == "MARC-8"
219
+ require "marc/marc8/to_unicode" unless defined? MARC::Marc8::ToUnicode
220
220
  end
221
-
222
221
  end
223
222
 
224
223
  # to support iteration:
@@ -226,13 +225,13 @@ module MARC
226
225
  # print record
227
226
  # end
228
227
  def each
229
- unless block_given?
230
- return self.enum_for(:each)
231
- else
232
- self.each_raw do |raw|
233
- record = self.decode(raw)
228
+ if block_given?
229
+ each_raw do |raw|
230
+ record = decode(raw)
234
231
  yield record
235
232
  end
233
+ else
234
+ enum_for(:each)
236
235
  end
237
236
  end
238
237
 
@@ -257,10 +256,8 @@ module MARC
257
256
  #
258
257
  # If no block is given, an enumerator is returned
259
258
  def each_raw
260
- unless block_given?
261
- return self.enum_for(:each_raw)
262
- else
263
- while rec_length_s = @handle.read(5)
259
+ if block_given?
260
+ while (rec_length_s = @handle.read(5))
264
261
  # make sure the record length looks like an integer
265
262
  rec_length_i = rec_length_s.to_i
266
263
  if rec_length_i == 0
@@ -269,9 +266,11 @@ module MARC
269
266
 
270
267
  # get the raw MARC21 for a record back from the file
271
268
  # using the record length
272
- raw = rec_length_s + @handle.read(rec_length_i-5)
269
+ raw = rec_length_s + @handle.read(rec_length_i - 5)
273
270
  yield raw
274
271
  end
272
+ else
273
+ enum_for(:each_raw)
275
274
  end
276
275
  end
277
276
 
@@ -280,7 +279,7 @@ module MARC
280
279
  # Wraps the class method MARC::Reader::decode, using the encoding options of
281
280
  # the MARC::Reader instance.
282
281
  def decode(marc)
283
- return MARC::Reader.decode(marc, @encoding_options)
282
+ MARC::Reader.decode(marc, @encoding_options)
284
283
  end
285
284
 
286
285
  # A static method for turning raw MARC data in transission
@@ -288,34 +287,34 @@ module MARC
288
287
  # First argument is a String
289
288
  # options include:
290
289
  # [:external_encoding] encoding of MARC record data values
291
- # [:forgiving] needs more docs, true is some kind of forgiving
292
- # of certain kinds of bad MARC.
293
- def self.decode(marc, params={})
290
+ # [:forgiving] needs more docs, true is some kind of forgiving
291
+ # of certain kinds of bad MARC.
292
+ def self.decode(marc, params = {})
294
293
  if params.has_key?(:encoding)
295
- $stderr.puts "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
294
+ warn "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
296
295
  params[:external_encoding] = params.delete(:encoding)
297
296
  end
298
-
299
- if (! params.has_key? :external_encoding ) && marc.respond_to?(:encoding)
297
+
298
+ if (!params.has_key? :external_encoding) && marc.respond_to?(:encoding)
300
299
  # If no forced external_encoding giving, respect the encoding
301
- # declared on the string passed in.
300
+ # declared on the string passed in.
302
301
  params[:external_encoding] = marc.encoding
303
302
  end
304
303
  # And now that we've recorded the current encoding, we force
305
304
  # to binary encoding, because we're going to be doing byte arithmetic,
306
- # and want to avoid byte-vs-char confusion.
305
+ # and want to avoid byte-vs-char confusion.
307
306
  marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
308
-
309
- record = Record.new()
310
- record.leader = marc[0..LEADER_LENGTH-1]
307
+
308
+ record = Record.new
309
+ record.leader = marc[0..LEADER_LENGTH - 1]
311
310
 
312
311
  # where the field data starts
313
312
  base_address = record.leader[12..16].to_i
314
313
 
315
314
  # get the byte offsets from the record directory
316
- directory = marc[LEADER_LENGTH..base_address-1]
315
+ directory = marc[LEADER_LENGTH..base_address - 1]
317
316
 
318
- raise MARC::Exception.new("invalid directory in record") if directory == nil
317
+ raise MARC::Exception.new("invalid directory in record") if directory.nil?
319
318
 
320
319
  # the number of fields in the record corresponds to
321
320
  # how many directory entries there are
@@ -324,20 +323,19 @@ module MARC
324
323
  # when operating in forgiving mode we just split on end of
325
324
  # field instead of using calculated byte offsets from the
326
325
  # directory
327
- if params[:forgiving]
326
+ if params[:forgiving]
328
327
  marc_field_data = marc[base_address..-1]
329
328
  # It won't let us do the split on bad utf8 data, but
330
329
  # we haven't yet set the 'proper' encoding or used
331
330
  # our correction/replace options. So call it binary for now.
332
331
  marc_field_data.force_encoding("binary") if marc_field_data.respond_to?(:force_encoding)
333
-
332
+
334
333
  all_fields = marc_field_data.split(END_OF_FIELD)
335
334
  else
336
- mba = marc.bytes.to_a
335
+ mba = marc.bytes.to_a
337
336
  end
338
337
 
339
- 0.upto(num_fields-1) do |field_num|
340
-
338
+ 0.upto(num_fields - 1) do |field_num|
341
339
  # pull the directory entry for a field out
342
340
  entry_start = field_num * DIRECTORY_ENTRY_LENGTH
343
341
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
@@ -350,12 +348,12 @@ module MARC
350
348
  # if we were told to be forgiving we just use the
351
349
  # next available chuck of field data that we
352
350
  # split apart based on the END_OF_FIELD
353
- field_data = ''
351
+ field_data = ""
354
352
  if params[:forgiving]
355
- field_data = all_fields.shift()
353
+ field_data = all_fields.shift
356
354
 
357
- # otherwise we actually use the byte offsets in
358
- # directory to figure out what field data to extract
355
+ # otherwise we actually use the byte offsets in
356
+ # directory to figure out what field data to extract
359
357
  else
360
358
  length = entry[3..6].to_i
361
359
  offset = entry[7..11].to_i
@@ -366,11 +364,11 @@ module MARC
366
364
 
367
365
  # remove end of field
368
366
  field_data.delete!(END_OF_FIELD)
369
-
367
+
370
368
  # add a control field or data field
371
369
  if MARC::ControlField.control_tag?(tag)
372
- field_data = MARC::Reader.set_encoding( field_data , params)
373
- record.append(MARC::ControlField.new(tag,field_data))
370
+ field_data = MARC::Reader.set_encoding(field_data, params)
371
+ record.append(MARC::ControlField.new(tag, field_data))
374
372
  else
375
373
  field = MARC::DataField.new(tag)
376
374
 
@@ -379,17 +377,17 @@ module MARC
379
377
 
380
378
  # must have at least 2 elements (indicators, and 1 subfield)
381
379
  # TODO some sort of logging?
382
- next if subfields.length() < 2
380
+ next if subfields.length < 2
383
381
 
384
382
  # get indicators
385
- indicators = MARC::Reader.set_encoding( subfields.shift(), params)
386
- field.indicator1 = indicators[0,1]
387
- field.indicator2 = indicators[1,1]
383
+ indicators = MARC::Reader.set_encoding(subfields.shift, params)
384
+ field.indicator1 = indicators[0, 1]
385
+ field.indicator2 = indicators[1, 1]
388
386
 
389
387
  # add each subfield to the field
390
- subfields.each() do |data|
391
- data = MARC::Reader.set_encoding( data, params )
392
- subfield = MARC::Subfield.new(data[0,1],data[1..-1])
388
+ subfields.each do |data|
389
+ data = MARC::Reader.set_encoding(data, params)
390
+ subfield = MARC::Subfield.new(data[0, 1], data[1..-1])
393
391
  field.append(subfield)
394
392
  end
395
393
 
@@ -398,10 +396,12 @@ module MARC
398
396
  end
399
397
  end
400
398
 
401
- return record
402
- end
399
+ raise MARC::RecordException, record unless record.valid?
400
+
401
+ record
402
+ end
403
403
 
404
- # input passed in probably has 'binary' encoding.
404
+ # input passed in probably has 'binary' encoding.
405
405
  # We'll set it to the proper encoding, and depending on settings, optionally
406
406
  # * check for valid encoding
407
407
  # * raise if not valid
@@ -411,16 +411,16 @@ module MARC
411
411
  # Special case for encoding "MARC-8" -- will be transcoded to
412
412
  # UTF-8 (then further transcoded to external_encoding, if set).
413
413
  # For "MARC-8", validate_encoding is always true, there's no way to
414
- # ignore bad bytes.
414
+ # ignore bad bytes.
415
415
  #
416
416
  # Params options:
417
- #
418
- # * external_encoding: what encoding the input is expected to be in
417
+ #
418
+ # * external_encoding: what encoding the input is expected to be in
419
419
  # * validate_encoding: if true, will raise if an invalid encoding
420
420
  # * invalid: if set to :replace, will replace bad bytes with replacement
421
- # chars instead of raising.
421
+ # chars instead of raising.
422
422
  # * replace: Set replacement char for use with 'invalid', otherwise defaults
423
- # to unicode replacement char, or question mark.
423
+ # to unicode replacement char, or question mark.
424
424
  def self.set_encoding(str, params)
425
425
  if str.respond_to?(:force_encoding)
426
426
  if params[:external_encoding]
@@ -430,37 +430,38 @@ module MARC
430
430
  else
431
431
  str = str.force_encoding(params[:external_encoding])
432
432
  end
433
- end
434
-
433
+ end
434
+
435
435
  # If we're transcoding anyway, pass our invalid/replace options
436
436
  # on to String#encode, which will take care of them -- or raise
437
- # with illegal bytes without :replace=>:invalid.
437
+ # with illegal bytes without :replace=>:invalid.
438
438
  #
439
439
  # If we're NOT transcoding, we need to use our own pure-ruby
440
440
  # implementation to do invalid byte replacements. OR to raise
441
441
  # a predicatable exception iff :validate_encoding, otherwise
442
442
  # for performance we won't check, and you may or may not
443
443
  # get an exception from inside ruby-marc, and it may change
444
- # in future implementations.
444
+ # in future implementations.
445
445
  if params[:internal_encoding]
446
- str = str.encode(params[:internal_encoding], params)
447
- elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
446
+ str = if RUBY_VERSION >= "3.0"
447
+ str.encode(params[:internal_encoding], **params)
448
+ else
449
+ str.encode(params[:internal_encoding], params)
450
+ end
451
+ elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
448
452
 
449
- if params[:validate_encoding] == true && ! str.valid_encoding?
450
- raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
453
+ if params[:validate_encoding] == true && !str.valid_encoding?
454
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
451
455
  end
452
456
  if params[:invalid] == :replace
453
457
  str = str.scrub(params[:replace])
454
458
  end
455
-
456
- end
457
- end
458
- return str
459
- end
460
- end
461
-
462
-
463
459
 
460
+ end
461
+ end
462
+ str
463
+ end
464
+ end
464
465
 
465
466
  # Like Reader ForgivingReader lets you read in a batch of MARC21 records
466
467
  # but it does not use record lengths and field byte offets found in the
@@ -475,22 +476,19 @@ module MARC
475
476
  #
476
477
  # **NOTE**: ForgivingReader _may_ have unpredictable results when used
477
478
  # with marc records with char encoding other than system default (usually
478
- # UTF8), _especially_ if you have Encoding.default_internal set.
479
+ # UTF8), _especially_ if you have Encoding.default_internal set.
479
480
  #
480
481
  # Implemented a sub-class of Reader over-riding #each, so we still
481
482
  # get DRY Reader's #initialize with proper char encoding options
482
- # and handling.
483
+ # and handling.
483
484
  class ForgivingReader < Reader
484
-
485
485
  def each
486
486
  @handle.each_line(END_OF_RECORD) do |raw|
487
- begin
488
- record = MARC::Reader.decode(raw, @encoding_options.merge(:forgiving => true))
489
- yield record
490
- rescue StandardError => e
491
- # caught exception just keep barrelling along
492
- # TODO add logging
493
- end
487
+ record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
488
+ yield record
489
+ rescue
490
+ # caught exception just keep barrelling along
491
+ # TODO add logging
494
492
  end
495
493
  end
496
494
  end