marc 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +106 -29
  7. data/Gemfile +15 -0
  8. data/README.md +240 -47
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -86
  22. data/lib/marc/reader.rb +119 -121
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -81
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +23 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -32
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +99 -87
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +94 -9
data/lib/marc/reader.rb CHANGED
@@ -1,10 +1,10 @@
1
- require 'scrub_rb'
1
+ require "scrub_rb"
2
2
 
3
3
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
4
  # only when necessary
5
5
 
6
6
  module MARC
7
- # A class for reading MARC binary (ISO 2709) files.
7
+ # A class for reading MARC binary (ISO 2709) files.
8
8
  #
9
9
  # == Character Encoding
10
10
  #
@@ -12,7 +12,7 @@ module MARC
12
12
  # If illegal bytes for that character encoding are encountered in certain
13
13
  # operations, ruby will raise an exception. If a String is incorrectly
14
14
  # tagged with the wrong character encoding, that makes it fairly likely
15
- # an illegal byte for the specified encoding will be encountered.
15
+ # an illegal byte for the specified encoding will be encountered.
16
16
  #
17
17
  # So when reading binary MARC data with the MARC::Reader, it's important
18
18
  # that you let it know the expected encoding:
@@ -21,7 +21,7 @@ module MARC
21
21
  #
22
22
  # If you leave off 'external_encoding', it will use the ruby environment
23
23
  # Encoding.default_external, which is usually UTF-8 but may depend on your
24
- # environment.
24
+ # environment.
25
25
  #
26
26
  # Even if you expect your data to be (eg) UTF-8, it may include bad/illegal
27
27
  # bytes. By default MARC::Reader will leave these in the produced Strings,
@@ -29,58 +29,58 @@ module MARC
29
29
  # to catch this early, and ask MARC::Reader to raise immediately on illegal
30
30
  # bytes:
31
31
  #
32
- # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
32
+ # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
33
33
  # :validate_encoding => true)
34
34
  #
35
35
  # Alternately, you can have MARC::Reader replace illegal bytes
36
36
  # with the Unicode Replacement Character, or with a string
37
37
  # of your choice (including the empty string, meaning just omit the bad bytes)
38
38
  #
39
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
39
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
40
40
  # :invalid => :replace)
41
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
41
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
42
42
  # :invalid => :replace, :replace => "")
43
43
  #
44
44
  # If you supply an :external_encoding argument, MARC::Reader will
45
45
  # always assume that encoding -- if you leave it off, MARC::Reader
46
46
  # will use the encoding tagged on any input you pass in, such
47
- # as Strings or File handles.
47
+ # as Strings or File handles.
48
48
  #
49
49
  # # marc data will have same encoding as string.encoding:
50
50
  # MARC::Reader.decode( string )
51
51
  #
52
52
  # # Same, values will have encoding of string.encoding:
53
- # MARC::Reader.new(StringIO.new(string))
53
+ # MARC::Reader.new(StringIO.new(string))
54
54
  #
55
55
  # # data values will have cp866 encoding, per external_encoding of
56
56
  # # File object passed in
57
57
  # MARC::Reader.new(File.new("myfile.marc", "r:cp866"))
58
58
  #
59
59
  # # explicitly tell MARC::Reader the encoding
60
- # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
60
+ # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
61
61
  #
62
62
  # === MARC-8
63
63
  #
64
64
  # The legacy MARC-8 encoding needs to be handled differently, because
65
- # there is no built-in support in ruby for MARC-8.
65
+ # there is no built-in support in ruby for MARC-8.
66
66
  #
67
67
  # You _can_ specify "MARC-8" as an external encoding. It will trigger
68
- # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
68
+ # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
69
69
  #
70
70
  # MARC::Reader.new("marc8.mrc", :external_encoding => "MARC-8")
71
71
  #
72
72
  # For external_encoding "MARC-8", :validate_encoding is always true,
73
73
  # there's no way to ignore bad bytes in MARC-8 when transcoding to
74
- # unicode. However, just as with other encodings, the
74
+ # unicode. However, just as with other encodings, the
75
75
  # `:invalid => :replace` and `:replace => "string"`
76
- # options can be used to replace bad bytes instead of raising.
76
+ # options can be used to replace bad bytes instead of raising.
77
77
  #
78
78
  # If you want your MARC-8 to be transcoded internally to something
79
79
  # other than UTF-8, you can use the :internal_encoding option
80
- # which works with any encoding in MARC::Reader.
80
+ # which works with any encoding in MARC::Reader.
81
81
  #
82
- # MARC::Reader.new("marc8.mrc",
83
- # :external_encoding => "MARC-8",
82
+ # MARC::Reader.new("marc8.mrc",
83
+ # :external_encoding => "MARC-8",
84
84
  # :internal_encoding => "UTF-16LE")
85
85
  #
86
86
  # If you want to read in MARC-8 without transcoding, leaving the
@@ -90,48 +90,48 @@ module MARC
90
90
  #
91
91
  # MARC::Reader.new("marc8.mrc", :external_encoding => "binary")
92
92
  #
93
- # Please note that MARC::Reader does _not_ currently have any facilities
94
- # for guessing encoding from MARC21 leader byte 9, that is ignored.
93
+ # Please note that MARC::Reader does _not_ currently have any facilities
94
+ # for guessing encoding from MARC21 leader byte 9, that is ignored.
95
95
  #
96
96
  # === Complete Encoding Options
97
97
  #
98
98
  # These options can all be used on MARC::Reader.new _or_ MARC::Reader.decode
99
99
  # to specify external encoding, ask for a transcode to a different
100
- # encoding on read, or validate or replace bad bytes in source.
100
+ # encoding on read, or validate or replace bad bytes in source.
101
101
  #
102
102
  # [:external_encoding]
103
103
  # What encoding to consider the MARC record's values to be in. This option
104
- # takes precedence over the File handle or String argument's encodings.
104
+ # takes precedence over the File handle or String argument's encodings.
105
105
  # [:internal_encoding]
106
106
  # Ask MARC::Reader to transcode to this encoding in memory after reading
107
- # the file in.
107
+ # the file in.
108
108
  # [:validate_encoding]
109
109
  # If you pass in `true`, MARC::Reader will promise to raise an Encoding::InvalidByteSequenceError
110
110
  # if there are illegal bytes in the source for the :external_encoding. There is
111
111
  # a performance penalty for this check. Without this option, an exception
112
- # _may_ or _may not_ be raised, and whether an exception or raised (or
112
+ # _may_ or _may not_ be raised, and whether an exception or raised (or
113
113
  # what class the exception has) may change in future ruby-marc versions
114
- # without warning.
114
+ # without warning.
115
115
  # [:invalid]
116
116
  # Just like String#encode, set to :replace and any bytes in source data
117
- # illegal for the source encoding will be replaced with the unicode
117
+ # illegal for the source encoding will be replaced with the unicode
118
118
  # replacement character (when in unicode encodings), or else '?'. Overrides
119
119
  # :validate_encoding. This can help you sanitize your input and
120
- # avoid ruby "invalid UTF-8 byte" exceptions later.
120
+ # avoid ruby "invalid UTF-8 byte" exceptions later.
121
121
  # [:replace]
122
122
  # Just like String#encode, combine with `:invalid=>:replace`, set
123
123
  # your own replacement string for invalid bytes. You may use the
124
- # empty string to simply eliminate invalid bytes.
124
+ # empty string to simply eliminate invalid bytes.
125
125
  #
126
126
  # === Warning on ruby File's own :internal_encoding, and unsafe transcoding from ruby
127
127
  #
128
- # Be careful with using an explicit File object with the File's own
129
- # :internal_encoding set -- it can cause ruby to transcode your data
130
- # _before_ MARC::Reader gets it, changing the bytecount and making the
128
+ # Be careful with using an explicit File object with the File's own
129
+ # :internal_encoding set -- it can cause ruby to transcode your data
130
+ # _before_ MARC::Reader gets it, changing the bytecount and making the
131
131
  # marc record unreadable in some cases. This
132
132
  # applies to Encoding.default_encoding too!
133
133
  #
134
- # # May in some cases result in unreadable marc and an exception
134
+ # # May in some cases result in unreadable marc and an exception
135
135
  # MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866:utf-8") )
136
136
  #
137
137
  # # May in some cases result in unreadable marc and an exception
@@ -156,7 +156,7 @@ module MARC
156
156
  # https://jira.codehaus.org/browse/JRUBY-6637
157
157
  #
158
158
  # We recommend using the latest version of jruby, especially
159
- # at least jruby 1.7.6.
159
+ # at least jruby 1.7.6.
160
160
  class Reader
161
161
  include Enumerable
162
162
 
@@ -182,43 +182,42 @@ module MARC
182
182
  #
183
183
  # Also, if your data encoded with non ascii/utf-8 encoding
184
184
  # (for ex. when reading RUSMARC data) and you use ruby 1.9
185
- # you can specify source data encoding with an option.
185
+ # you can specify source data encoding with an option.
186
186
  #
187
187
  # reader = MARC::Reader.new('marc.dat', :external_encoding => 'cp866')
188
188
  #
189
189
  # or, you can pass IO, opened in the corresponding encoding
190
190
  #
191
191
  # reader = MARC::Reader.new(File.new('marc.dat', 'r:cp866'))
192
- def initialize(file, options = {})
192
+ def initialize(file, options = {})
193
193
  @encoding_options = {}
194
194
  # all can be nil
195
195
  [:internal_encoding, :external_encoding, :invalid, :replace, :validate_encoding].each do |key|
196
196
  @encoding_options[key] = options[key] if options.has_key?(key)
197
197
  end
198
-
199
- if file.is_a?(String)
198
+
199
+ if file.is_a?(String)
200
200
  @handle = File.new(file)
201
- elsif file.respond_to?("read", 5)
201
+ elsif file.respond_to?(:read, 5)
202
202
  @handle = file
203
203
  else
204
204
  raise ArgumentError, "must pass in path or file"
205
205
  end
206
-
207
- if (! @encoding_options[:external_encoding] ) && @handle.respond_to?(:external_encoding)
206
+
207
+ if (!@encoding_options[:external_encoding]) && @handle.respond_to?(:external_encoding)
208
208
  # use file encoding only if we didn't already have an explicit one,
209
- # explicit one takes precedence.
209
+ # explicit one takes precedence.
210
210
  #
211
211
  # Note, please don't use ruby's own internal_encoding transcode
212
212
  # with binary marc data, the transcode can mess up the byte count
213
- # and make it unreadable.
213
+ # and make it unreadable.
214
214
  @encoding_options[:external_encoding] ||= @handle.external_encoding
215
215
  end
216
216
 
217
217
  # Only pull in the MARC8 translation if we need it, since it's really big
218
- if @encoding_options[:external_encoding] == "MARC-8"
219
- require 'marc/marc8/to_unicode' unless defined? MARC::Marc8::ToUnicode
218
+ if @encoding_options[:external_encoding] == "MARC-8"
219
+ require "marc/marc8/to_unicode" unless defined? MARC::Marc8::ToUnicode
220
220
  end
221
-
222
221
  end
223
222
 
224
223
  # to support iteration:
@@ -226,13 +225,13 @@ module MARC
226
225
  # print record
227
226
  # end
228
227
  def each
229
- unless block_given?
230
- return self.enum_for(:each)
231
- else
232
- self.each_raw do |raw|
233
- record = self.decode(raw)
228
+ if block_given?
229
+ each_raw do |raw|
230
+ record = decode(raw)
234
231
  yield record
235
232
  end
233
+ else
234
+ enum_for(:each)
236
235
  end
237
236
  end
238
237
 
@@ -257,10 +256,8 @@ module MARC
257
256
  #
258
257
  # If no block is given, an enumerator is returned
259
258
  def each_raw
260
- unless block_given?
261
- return self.enum_for(:each_raw)
262
- else
263
- while rec_length_s = @handle.read(5)
259
+ if block_given?
260
+ while (rec_length_s = @handle.read(5))
264
261
  # make sure the record length looks like an integer
265
262
  rec_length_i = rec_length_s.to_i
266
263
  if rec_length_i == 0
@@ -269,9 +266,11 @@ module MARC
269
266
 
270
267
  # get the raw MARC21 for a record back from the file
271
268
  # using the record length
272
- raw = rec_length_s + @handle.read(rec_length_i-5)
269
+ raw = rec_length_s + @handle.read(rec_length_i - 5)
273
270
  yield raw
274
271
  end
272
+ else
273
+ enum_for(:each_raw)
275
274
  end
276
275
  end
277
276
 
@@ -280,7 +279,7 @@ module MARC
280
279
  # Wraps the class method MARC::Reader::decode, using the encoding options of
281
280
  # the MARC::Reader instance.
282
281
  def decode(marc)
283
- return MARC::Reader.decode(marc, @encoding_options)
282
+ MARC::Reader.decode(marc, @encoding_options)
284
283
  end
285
284
 
286
285
  # A static method for turning raw MARC data in transission
@@ -288,34 +287,34 @@ module MARC
288
287
  # First argument is a String
289
288
  # options include:
290
289
  # [:external_encoding] encoding of MARC record data values
291
- # [:forgiving] needs more docs, true is some kind of forgiving
292
- # of certain kinds of bad MARC.
293
- def self.decode(marc, params={})
290
+ # [:forgiving] needs more docs, true is some kind of forgiving
291
+ # of certain kinds of bad MARC.
292
+ def self.decode(marc, params = {})
294
293
  if params.has_key?(:encoding)
295
- $stderr.puts "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
294
+ warn "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
296
295
  params[:external_encoding] = params.delete(:encoding)
297
296
  end
298
-
299
- if (! params.has_key? :external_encoding ) && marc.respond_to?(:encoding)
297
+
298
+ if (!params.has_key? :external_encoding) && marc.respond_to?(:encoding)
300
299
  # If no forced external_encoding giving, respect the encoding
301
- # declared on the string passed in.
300
+ # declared on the string passed in.
302
301
  params[:external_encoding] = marc.encoding
303
302
  end
304
303
  # And now that we've recorded the current encoding, we force
305
304
  # to binary encoding, because we're going to be doing byte arithmetic,
306
- # and want to avoid byte-vs-char confusion.
305
+ # and want to avoid byte-vs-char confusion.
307
306
  marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
308
-
309
- record = Record.new()
310
- record.leader = marc[0..LEADER_LENGTH-1]
307
+
308
+ record = Record.new
309
+ record.leader = marc[0..LEADER_LENGTH - 1]
311
310
 
312
311
  # where the field data starts
313
312
  base_address = record.leader[12..16].to_i
314
313
 
315
314
  # get the byte offsets from the record directory
316
- directory = marc[LEADER_LENGTH..base_address-1]
315
+ directory = marc[LEADER_LENGTH..base_address - 1]
317
316
 
318
- raise MARC::Exception.new("invalid directory in record") if directory == nil
317
+ raise MARC::Exception.new("invalid directory in record") if directory.nil?
319
318
 
320
319
  # the number of fields in the record corresponds to
321
320
  # how many directory entries there are
@@ -324,20 +323,19 @@ module MARC
324
323
  # when operating in forgiving mode we just split on end of
325
324
  # field instead of using calculated byte offsets from the
326
325
  # directory
327
- if params[:forgiving]
326
+ if params[:forgiving]
328
327
  marc_field_data = marc[base_address..-1]
329
328
  # It won't let us do the split on bad utf8 data, but
330
329
  # we haven't yet set the 'proper' encoding or used
331
330
  # our correction/replace options. So call it binary for now.
332
331
  marc_field_data.force_encoding("binary") if marc_field_data.respond_to?(:force_encoding)
333
-
332
+
334
333
  all_fields = marc_field_data.split(END_OF_FIELD)
335
334
  else
336
- mba = marc.bytes.to_a
335
+ mba = marc.bytes.to_a
337
336
  end
338
337
 
339
- 0.upto(num_fields-1) do |field_num|
340
-
338
+ 0.upto(num_fields - 1) do |field_num|
341
339
  # pull the directory entry for a field out
342
340
  entry_start = field_num * DIRECTORY_ENTRY_LENGTH
343
341
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
@@ -350,12 +348,12 @@ module MARC
350
348
  # if we were told to be forgiving we just use the
351
349
  # next available chuck of field data that we
352
350
  # split apart based on the END_OF_FIELD
353
- field_data = ''
351
+ field_data = ""
354
352
  if params[:forgiving]
355
- field_data = all_fields.shift()
353
+ field_data = all_fields.shift
356
354
 
357
- # otherwise we actually use the byte offsets in
358
- # directory to figure out what field data to extract
355
+ # otherwise we actually use the byte offsets in
356
+ # directory to figure out what field data to extract
359
357
  else
360
358
  length = entry[3..6].to_i
361
359
  offset = entry[7..11].to_i
@@ -366,11 +364,11 @@ module MARC
366
364
 
367
365
  # remove end of field
368
366
  field_data.delete!(END_OF_FIELD)
369
-
367
+
370
368
  # add a control field or data field
371
369
  if MARC::ControlField.control_tag?(tag)
372
- field_data = MARC::Reader.set_encoding( field_data , params)
373
- record.append(MARC::ControlField.new(tag,field_data))
370
+ field_data = MARC::Reader.set_encoding(field_data, params)
371
+ record.append(MARC::ControlField.new(tag, field_data))
374
372
  else
375
373
  field = MARC::DataField.new(tag)
376
374
 
@@ -379,17 +377,17 @@ module MARC
379
377
 
380
378
  # must have at least 2 elements (indicators, and 1 subfield)
381
379
  # TODO some sort of logging?
382
- next if subfields.length() < 2
380
+ next if subfields.length < 2
383
381
 
384
382
  # get indicators
385
- indicators = MARC::Reader.set_encoding( subfields.shift(), params)
386
- field.indicator1 = indicators[0,1]
387
- field.indicator2 = indicators[1,1]
383
+ indicators = MARC::Reader.set_encoding(subfields.shift, params)
384
+ field.indicator1 = indicators[0, 1]
385
+ field.indicator2 = indicators[1, 1]
388
386
 
389
387
  # add each subfield to the field
390
- subfields.each() do |data|
391
- data = MARC::Reader.set_encoding( data, params )
392
- subfield = MARC::Subfield.new(data[0,1],data[1..-1])
388
+ subfields.each do |data|
389
+ data = MARC::Reader.set_encoding(data, params)
390
+ subfield = MARC::Subfield.new(data[0, 1], data[1..-1])
393
391
  field.append(subfield)
394
392
  end
395
393
 
@@ -398,10 +396,12 @@ module MARC
398
396
  end
399
397
  end
400
398
 
401
- return record
402
- end
399
+ raise MARC::RecordException, record unless record.valid?
400
+
401
+ record
402
+ end
403
403
 
404
- # input passed in probably has 'binary' encoding.
404
+ # input passed in probably has 'binary' encoding.
405
405
  # We'll set it to the proper encoding, and depending on settings, optionally
406
406
  # * check for valid encoding
407
407
  # * raise if not valid
@@ -411,16 +411,16 @@ module MARC
411
411
  # Special case for encoding "MARC-8" -- will be transcoded to
412
412
  # UTF-8 (then further transcoded to external_encoding, if set).
413
413
  # For "MARC-8", validate_encoding is always true, there's no way to
414
- # ignore bad bytes.
414
+ # ignore bad bytes.
415
415
  #
416
416
  # Params options:
417
- #
418
- # * external_encoding: what encoding the input is expected to be in
417
+ #
418
+ # * external_encoding: what encoding the input is expected to be in
419
419
  # * validate_encoding: if true, will raise if an invalid encoding
420
420
  # * invalid: if set to :replace, will replace bad bytes with replacement
421
- # chars instead of raising.
421
+ # chars instead of raising.
422
422
  # * replace: Set replacement char for use with 'invalid', otherwise defaults
423
- # to unicode replacement char, or question mark.
423
+ # to unicode replacement char, or question mark.
424
424
  def self.set_encoding(str, params)
425
425
  if str.respond_to?(:force_encoding)
426
426
  if params[:external_encoding]
@@ -430,37 +430,38 @@ module MARC
430
430
  else
431
431
  str = str.force_encoding(params[:external_encoding])
432
432
  end
433
- end
434
-
433
+ end
434
+
435
435
  # If we're transcoding anyway, pass our invalid/replace options
436
436
  # on to String#encode, which will take care of them -- or raise
437
- # with illegal bytes without :replace=>:invalid.
437
+ # with illegal bytes without :replace=>:invalid.
438
438
  #
439
439
  # If we're NOT transcoding, we need to use our own pure-ruby
440
440
  # implementation to do invalid byte replacements. OR to raise
441
441
  # a predicatable exception iff :validate_encoding, otherwise
442
442
  # for performance we won't check, and you may or may not
443
443
  # get an exception from inside ruby-marc, and it may change
444
- # in future implementations.
444
+ # in future implementations.
445
445
  if params[:internal_encoding]
446
- str = str.encode(params[:internal_encoding], params)
447
- elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
446
+ str = if RUBY_VERSION >= "3.0"
447
+ str.encode(params[:internal_encoding], **params)
448
+ else
449
+ str.encode(params[:internal_encoding], params)
450
+ end
451
+ elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
448
452
 
449
- if params[:validate_encoding] == true && ! str.valid_encoding?
450
- raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
453
+ if params[:validate_encoding] == true && !str.valid_encoding?
454
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
451
455
  end
452
456
  if params[:invalid] == :replace
453
457
  str = str.scrub(params[:replace])
454
458
  end
455
-
456
- end
457
- end
458
- return str
459
- end
460
- end
461
-
462
-
463
459
 
460
+ end
461
+ end
462
+ str
463
+ end
464
+ end
464
465
 
465
466
  # Like Reader ForgivingReader lets you read in a batch of MARC21 records
466
467
  # but it does not use record lengths and field byte offets found in the
@@ -475,22 +476,19 @@ module MARC
475
476
  #
476
477
  # **NOTE**: ForgivingReader _may_ have unpredictable results when used
477
478
  # with marc records with char encoding other than system default (usually
478
- # UTF8), _especially_ if you have Encoding.default_internal set.
479
+ # UTF8), _especially_ if you have Encoding.default_internal set.
479
480
  #
480
481
  # Implemented a sub-class of Reader over-riding #each, so we still
481
482
  # get DRY Reader's #initialize with proper char encoding options
482
- # and handling.
483
+ # and handling.
483
484
  class ForgivingReader < Reader
484
-
485
485
  def each
486
486
  @handle.each_line(END_OF_RECORD) do |raw|
487
- begin
488
- record = MARC::Reader.decode(raw, @encoding_options.merge(:forgiving => true))
489
- yield record
490
- rescue StandardError => e
491
- # caught exception just keep barrelling along
492
- # TODO add logging
493
- end
487
+ record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
488
+ yield record
489
+ rescue
490
+ # caught exception just keep barrelling along
491
+ # TODO add logging
494
492
  end
495
493
  end
496
494
  end