marc 1.1.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +116 -30
  7. data/Gemfile +5 -0
  8. data/README.md +239 -46
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -87
  22. data/lib/marc/reader.rb +116 -124
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -82
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +28 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -34
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +101 -94
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +129 -22
data/lib/marc/reader.rb CHANGED
@@ -1,10 +1,8 @@
1
- require 'scrub_rb'
2
-
3
1
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
2
  # only when necessary
5
3
 
6
4
  module MARC
7
- # A class for reading MARC binary (ISO 2709) files.
5
+ # A class for reading MARC binary (ISO 2709) files.
8
6
  #
9
7
  # == Character Encoding
10
8
  #
@@ -12,7 +10,7 @@ module MARC
12
10
  # If illegal bytes for that character encoding are encountered in certain
13
11
  # operations, ruby will raise an exception. If a String is incorrectly
14
12
  # tagged with the wrong character encoding, that makes it fairly likely
15
- # an illegal byte for the specified encoding will be encountered.
13
+ # an illegal byte for the specified encoding will be encountered.
16
14
  #
17
15
  # So when reading binary MARC data with the MARC::Reader, it's important
18
16
  # that you let it know the expected encoding:
@@ -21,7 +19,7 @@ module MARC
21
19
  #
22
20
  # If you leave off 'external_encoding', it will use the ruby environment
23
21
  # Encoding.default_external, which is usually UTF-8 but may depend on your
24
- # environment.
22
+ # environment.
25
23
  #
26
24
  # Even if you expect your data to be (eg) UTF-8, it may include bad/illegal
27
25
  # bytes. By default MARC::Reader will leave these in the produced Strings,
@@ -29,58 +27,58 @@ module MARC
29
27
  # to catch this early, and ask MARC::Reader to raise immediately on illegal
30
28
  # bytes:
31
29
  #
32
- # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
30
+ # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
33
31
  # :validate_encoding => true)
34
32
  #
35
33
  # Alternately, you can have MARC::Reader replace illegal bytes
36
34
  # with the Unicode Replacement Character, or with a string
37
35
  # of your choice (including the empty string, meaning just omit the bad bytes)
38
36
  #
39
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
37
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
40
38
  # :invalid => :replace)
41
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
39
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
42
40
  # :invalid => :replace, :replace => "")
43
41
  #
44
42
  # If you supply an :external_encoding argument, MARC::Reader will
45
43
  # always assume that encoding -- if you leave it off, MARC::Reader
46
44
  # will use the encoding tagged on any input you pass in, such
47
- # as Strings or File handles.
45
+ # as Strings or File handles.
48
46
  #
49
47
  # # marc data will have same encoding as string.encoding:
50
48
  # MARC::Reader.decode( string )
51
49
  #
52
50
  # # Same, values will have encoding of string.encoding:
53
- # MARC::Reader.new(StringIO.new(string))
51
+ # MARC::Reader.new(StringIO.new(string))
54
52
  #
55
53
  # # data values will have cp866 encoding, per external_encoding of
56
54
  # # File object passed in
57
55
  # MARC::Reader.new(File.new("myfile.marc", "r:cp866"))
58
56
  #
59
57
  # # explicitly tell MARC::Reader the encoding
60
- # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
58
+ # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
61
59
  #
62
60
  # === MARC-8
63
61
  #
64
62
  # The legacy MARC-8 encoding needs to be handled differently, because
65
- # there is no built-in support in ruby for MARC-8.
63
+ # there is no built-in support in ruby for MARC-8.
66
64
  #
67
65
  # You _can_ specify "MARC-8" as an external encoding. It will trigger
68
- # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
66
+ # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
69
67
  #
70
68
  # MARC::Reader.new("marc8.mrc", :external_encoding => "MARC-8")
71
69
  #
72
70
  # For external_encoding "MARC-8", :validate_encoding is always true,
73
71
  # there's no way to ignore bad bytes in MARC-8 when transcoding to
74
- # unicode. However, just as with other encodings, the
72
+ # unicode. However, just as with other encodings, the
75
73
  # `:invalid => :replace` and `:replace => "string"`
76
- # options can be used to replace bad bytes instead of raising.
74
+ # options can be used to replace bad bytes instead of raising.
77
75
  #
78
76
  # If you want your MARC-8 to be transcoded internally to something
79
77
  # other than UTF-8, you can use the :internal_encoding option
80
- # which works with any encoding in MARC::Reader.
78
+ # which works with any encoding in MARC::Reader.
81
79
  #
82
- # MARC::Reader.new("marc8.mrc",
83
- # :external_encoding => "MARC-8",
80
+ # MARC::Reader.new("marc8.mrc",
81
+ # :external_encoding => "MARC-8",
84
82
  # :internal_encoding => "UTF-16LE")
85
83
  #
86
84
  # If you want to read in MARC-8 without transcoding, leaving the
@@ -90,48 +88,48 @@ module MARC
90
88
  #
91
89
  # MARC::Reader.new("marc8.mrc", :external_encoding => "binary")
92
90
  #
93
- # Please note that MARC::Reader does _not_ currently have any facilities
94
- # for guessing encoding from MARC21 leader byte 9, that is ignored.
91
+ # Please note that MARC::Reader does _not_ currently have any facilities
92
+ # for guessing encoding from MARC21 leader byte 9, that is ignored.
95
93
  #
96
94
  # === Complete Encoding Options
97
95
  #
98
96
  # These options can all be used on MARC::Reader.new _or_ MARC::Reader.decode
99
97
  # to specify external encoding, ask for a transcode to a different
100
- # encoding on read, or validate or replace bad bytes in source.
98
+ # encoding on read, or validate or replace bad bytes in source.
101
99
  #
102
100
  # [:external_encoding]
103
101
  # What encoding to consider the MARC record's values to be in. This option
104
- # takes precedence over the File handle or String argument's encodings.
102
+ # takes precedence over the File handle or String argument's encodings.
105
103
  # [:internal_encoding]
106
104
  # Ask MARC::Reader to transcode to this encoding in memory after reading
107
- # the file in.
105
+ # the file in.
108
106
  # [:validate_encoding]
109
107
  # If you pass in `true`, MARC::Reader will promise to raise an Encoding::InvalidByteSequenceError
110
108
  # if there are illegal bytes in the source for the :external_encoding. There is
111
109
  # a performance penalty for this check. Without this option, an exception
112
- # _may_ or _may not_ be raised, and whether an exception or raised (or
110
+ # _may_ or _may not_ be raised, and whether an exception or raised (or
113
111
  # what class the exception has) may change in future ruby-marc versions
114
- # without warning.
112
+ # without warning.
115
113
  # [:invalid]
116
114
  # Just like String#encode, set to :replace and any bytes in source data
117
- # illegal for the source encoding will be replaced with the unicode
115
+ # illegal for the source encoding will be replaced with the unicode
118
116
  # replacement character (when in unicode encodings), or else '?'. Overrides
119
117
  # :validate_encoding. This can help you sanitize your input and
120
- # avoid ruby "invalid UTF-8 byte" exceptions later.
118
+ # avoid ruby "invalid UTF-8 byte" exceptions later.
121
119
  # [:replace]
122
120
  # Just like String#encode, combine with `:invalid=>:replace`, set
123
121
  # your own replacement string for invalid bytes. You may use the
124
- # empty string to simply eliminate invalid bytes.
122
+ # empty string to simply eliminate invalid bytes.
125
123
  #
126
124
  # === Warning on ruby File's own :internal_encoding, and unsafe transcoding from ruby
127
125
  #
128
- # Be careful with using an explicit File object with the File's own
129
- # :internal_encoding set -- it can cause ruby to transcode your data
130
- # _before_ MARC::Reader gets it, changing the bytecount and making the
126
+ # Be careful with using an explicit File object with the File's own
127
+ # :internal_encoding set -- it can cause ruby to transcode your data
128
+ # _before_ MARC::Reader gets it, changing the bytecount and making the
131
129
  # marc record unreadable in some cases. This
132
130
  # applies to Encoding.default_encoding too!
133
131
  #
134
- # # May in some cases result in unreadable marc and an exception
132
+ # # May in some cases result in unreadable marc and an exception
135
133
  # MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866:utf-8") )
136
134
  #
137
135
  # # May in some cases result in unreadable marc and an exception
@@ -156,7 +154,7 @@ module MARC
156
154
  # https://jira.codehaus.org/browse/JRUBY-6637
157
155
  #
158
156
  # We recommend using the latest version of jruby, especially
159
- # at least jruby 1.7.6.
157
+ # at least jruby 1.7.6.
160
158
  class Reader
161
159
  include Enumerable
162
160
 
@@ -182,43 +180,42 @@ module MARC
182
180
  #
183
181
  # Also, if your data encoded with non ascii/utf-8 encoding
184
182
  # (for ex. when reading RUSMARC data) and you use ruby 1.9
185
- # you can specify source data encoding with an option.
183
+ # you can specify source data encoding with an option.
186
184
  #
187
185
  # reader = MARC::Reader.new('marc.dat', :external_encoding => 'cp866')
188
186
  #
189
187
  # or, you can pass IO, opened in the corresponding encoding
190
188
  #
191
189
  # reader = MARC::Reader.new(File.new('marc.dat', 'r:cp866'))
192
- def initialize(file, options = {})
190
+ def initialize(file, options = {})
193
191
  @encoding_options = {}
194
192
  # all can be nil
195
193
  [:internal_encoding, :external_encoding, :invalid, :replace, :validate_encoding].each do |key|
196
194
  @encoding_options[key] = options[key] if options.has_key?(key)
197
195
  end
198
-
199
- if file.is_a?(String)
196
+
197
+ if file.is_a?(String)
200
198
  @handle = File.new(file)
201
- elsif file.respond_to?("read", 5)
199
+ elsif file.respond_to?(:read, 5)
202
200
  @handle = file
203
201
  else
204
202
  raise ArgumentError, "must pass in path or file"
205
203
  end
206
-
207
- if (! @encoding_options[:external_encoding] ) && @handle.respond_to?(:external_encoding)
204
+
205
+ if (!@encoding_options[:external_encoding]) && @handle.respond_to?(:external_encoding)
208
206
  # use file encoding only if we didn't already have an explicit one,
209
- # explicit one takes precedence.
207
+ # explicit one takes precedence.
210
208
  #
211
209
  # Note, please don't use ruby's own internal_encoding transcode
212
210
  # with binary marc data, the transcode can mess up the byte count
213
- # and make it unreadable.
211
+ # and make it unreadable.
214
212
  @encoding_options[:external_encoding] ||= @handle.external_encoding
215
213
  end
216
214
 
217
215
  # Only pull in the MARC8 translation if we need it, since it's really big
218
- if @encoding_options[:external_encoding] == "MARC-8"
219
- require 'marc/marc8/to_unicode' unless defined? MARC::Marc8::ToUnicode
216
+ if @encoding_options[:external_encoding] == "MARC-8"
217
+ require "marc/marc8/to_unicode" unless defined? MARC::Marc8::ToUnicode
220
218
  end
221
-
222
219
  end
223
220
 
224
221
  # to support iteration:
@@ -226,13 +223,13 @@ module MARC
226
223
  # print record
227
224
  # end
228
225
  def each
229
- unless block_given?
230
- return self.enum_for(:each)
231
- else
232
- self.each_raw do |raw|
233
- record = self.decode(raw)
226
+ if block_given?
227
+ each_raw do |raw|
228
+ record = decode(raw)
234
229
  yield record
235
230
  end
231
+ else
232
+ enum_for(:each)
236
233
  end
237
234
  end
238
235
 
@@ -257,10 +254,8 @@ module MARC
257
254
  #
258
255
  # If no block is given, an enumerator is returned
259
256
  def each_raw
260
- unless block_given?
261
- return self.enum_for(:each_raw)
262
- else
263
- while rec_length_s = @handle.read(5)
257
+ if block_given?
258
+ while (rec_length_s = @handle.read(5))
264
259
  # make sure the record length looks like an integer
265
260
  rec_length_i = rec_length_s.to_i
266
261
  if rec_length_i == 0
@@ -269,9 +264,11 @@ module MARC
269
264
 
270
265
  # get the raw MARC21 for a record back from the file
271
266
  # using the record length
272
- raw = rec_length_s + @handle.read(rec_length_i-5)
267
+ raw = rec_length_s + @handle.read(rec_length_i - 5)
273
268
  yield raw
274
269
  end
270
+ else
271
+ enum_for(:each_raw)
275
272
  end
276
273
  end
277
274
 
@@ -280,7 +277,7 @@ module MARC
280
277
  # Wraps the class method MARC::Reader::decode, using the encoding options of
281
278
  # the MARC::Reader instance.
282
279
  def decode(marc)
283
- return MARC::Reader.decode(marc, @encoding_options)
280
+ MARC::Reader.decode(marc, @encoding_options)
284
281
  end
285
282
 
286
283
  # A static method for turning raw MARC data in transission
@@ -288,34 +285,34 @@ module MARC
288
285
  # First argument is a String
289
286
  # options include:
290
287
  # [:external_encoding] encoding of MARC record data values
291
- # [:forgiving] needs more docs, true is some kind of forgiving
292
- # of certain kinds of bad MARC.
293
- def self.decode(marc, params={})
288
+ # [:forgiving] needs more docs, true is some kind of forgiving
289
+ # of certain kinds of bad MARC.
290
+ def self.decode(marc, params = {})
294
291
  if params.has_key?(:encoding)
295
- $stderr.puts "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
292
+ warn "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
296
293
  params[:external_encoding] = params.delete(:encoding)
297
294
  end
298
-
299
- if (! params.has_key? :external_encoding ) && marc.respond_to?(:encoding)
295
+
296
+ if (!params.has_key? :external_encoding) && marc.respond_to?(:encoding)
300
297
  # If no forced external_encoding giving, respect the encoding
301
- # declared on the string passed in.
298
+ # declared on the string passed in.
302
299
  params[:external_encoding] = marc.encoding
303
300
  end
304
301
  # And now that we've recorded the current encoding, we force
305
302
  # to binary encoding, because we're going to be doing byte arithmetic,
306
- # and want to avoid byte-vs-char confusion.
303
+ # and want to avoid byte-vs-char confusion.
307
304
  marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
308
-
309
- record = Record.new()
310
- record.leader = marc[0..LEADER_LENGTH-1]
305
+
306
+ record = Record.new
307
+ record.leader = marc[0..LEADER_LENGTH - 1]
311
308
 
312
309
  # where the field data starts
313
310
  base_address = record.leader[12..16].to_i
314
311
 
315
312
  # get the byte offsets from the record directory
316
- directory = marc[LEADER_LENGTH..base_address-1]
313
+ directory = marc[LEADER_LENGTH..base_address - 1]
317
314
 
318
- raise MARC::Exception.new("invalid directory in record") if directory == nil
315
+ raise MARC::Exception.new("invalid directory in record") if directory.nil?
319
316
 
320
317
  # the number of fields in the record corresponds to
321
318
  # how many directory entries there are
@@ -324,20 +321,19 @@ module MARC
324
321
  # when operating in forgiving mode we just split on end of
325
322
  # field instead of using calculated byte offsets from the
326
323
  # directory
327
- if params[:forgiving]
324
+ if params[:forgiving]
328
325
  marc_field_data = marc[base_address..-1]
329
326
  # It won't let us do the split on bad utf8 data, but
330
327
  # we haven't yet set the 'proper' encoding or used
331
328
  # our correction/replace options. So call it binary for now.
332
329
  marc_field_data.force_encoding("binary") if marc_field_data.respond_to?(:force_encoding)
333
-
330
+
334
331
  all_fields = marc_field_data.split(END_OF_FIELD)
335
332
  else
336
- mba = marc.bytes.to_a
333
+ mba = marc.bytes.to_a
337
334
  end
338
335
 
339
- 0.upto(num_fields-1) do |field_num|
340
-
336
+ 0.upto(num_fields - 1) do |field_num|
341
337
  # pull the directory entry for a field out
342
338
  entry_start = field_num * DIRECTORY_ENTRY_LENGTH
343
339
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
@@ -350,12 +346,12 @@ module MARC
350
346
  # if we were told to be forgiving we just use the
351
347
  # next available chuck of field data that we
352
348
  # split apart based on the END_OF_FIELD
353
- field_data = ''
349
+ field_data = ""
354
350
  if params[:forgiving]
355
- field_data = all_fields.shift()
351
+ field_data = all_fields.shift
356
352
 
357
- # otherwise we actually use the byte offsets in
358
- # directory to figure out what field data to extract
353
+ # otherwise we actually use the byte offsets in
354
+ # directory to figure out what field data to extract
359
355
  else
360
356
  length = entry[3..6].to_i
361
357
  offset = entry[7..11].to_i
@@ -366,11 +362,11 @@ module MARC
366
362
 
367
363
  # remove end of field
368
364
  field_data.delete!(END_OF_FIELD)
369
-
365
+
370
366
  # add a control field or data field
371
367
  if MARC::ControlField.control_tag?(tag)
372
- field_data = MARC::Reader.set_encoding( field_data , params)
373
- record.append(MARC::ControlField.new(tag,field_data))
368
+ field_data = MARC::Reader.set_encoding(field_data, params)
369
+ record.append(MARC::ControlField.new(tag, field_data))
374
370
  else
375
371
  field = MARC::DataField.new(tag)
376
372
 
@@ -379,17 +375,17 @@ module MARC
379
375
 
380
376
  # must have at least 2 elements (indicators, and 1 subfield)
381
377
  # TODO some sort of logging?
382
- next if subfields.length() < 2
378
+ next if subfields.length < 2
383
379
 
384
380
  # get indicators
385
- indicators = MARC::Reader.set_encoding( subfields.shift(), params)
386
- field.indicator1 = indicators[0,1]
387
- field.indicator2 = indicators[1,1]
381
+ indicators = MARC::Reader.set_encoding(subfields.shift, params)
382
+ field.indicator1 = indicators[0, 1]
383
+ field.indicator2 = indicators[1, 1]
388
384
 
389
385
  # add each subfield to the field
390
- subfields.each() do |data|
391
- data = MARC::Reader.set_encoding( data, params )
392
- subfield = MARC::Subfield.new(data[0,1],data[1..-1])
386
+ subfields.each do |data|
387
+ data = MARC::Reader.set_encoding(data, params)
388
+ subfield = MARC::Subfield.new(data[0, 1], data[1..-1])
393
389
  field.append(subfield)
394
390
  end
395
391
 
@@ -398,10 +394,12 @@ module MARC
398
394
  end
399
395
  end
400
396
 
401
- return record
402
- end
397
+ raise MARC::RecordException, record unless record.valid?
403
398
 
404
- # input passed in probably has 'binary' encoding.
399
+ record
400
+ end
401
+
402
+ # input passed in probably has 'binary' encoding.
405
403
  # We'll set it to the proper encoding, and depending on settings, optionally
406
404
  # * check for valid encoding
407
405
  # * raise if not valid
@@ -411,16 +409,16 @@ module MARC
411
409
  # Special case for encoding "MARC-8" -- will be transcoded to
412
410
  # UTF-8 (then further transcoded to external_encoding, if set).
413
411
  # For "MARC-8", validate_encoding is always true, there's no way to
414
- # ignore bad bytes.
412
+ # ignore bad bytes.
415
413
  #
416
414
  # Params options:
417
- #
418
- # * external_encoding: what encoding the input is expected to be in
415
+ #
416
+ # * external_encoding: what encoding the input is expected to be in
419
417
  # * validate_encoding: if true, will raise if an invalid encoding
420
418
  # * invalid: if set to :replace, will replace bad bytes with replacement
421
- # chars instead of raising.
419
+ # chars instead of raising.
422
420
  # * replace: Set replacement char for use with 'invalid', otherwise defaults
423
- # to unicode replacement char, or question mark.
421
+ # to unicode replacement char, or question mark.
424
422
  def self.set_encoding(str, params)
425
423
  if str.respond_to?(:force_encoding)
426
424
  if params[:external_encoding]
@@ -430,41 +428,38 @@ module MARC
430
428
  else
431
429
  str = str.force_encoding(params[:external_encoding])
432
430
  end
433
- end
434
-
431
+ end
432
+
435
433
  # If we're transcoding anyway, pass our invalid/replace options
436
434
  # on to String#encode, which will take care of them -- or raise
437
- # with illegal bytes without :replace=>:invalid.
435
+ # with illegal bytes without :replace=>:invalid.
438
436
  #
439
437
  # If we're NOT transcoding, we need to use our own pure-ruby
440
438
  # implementation to do invalid byte replacements. OR to raise
441
439
  # a predicatable exception iff :validate_encoding, otherwise
442
440
  # for performance we won't check, and you may or may not
443
441
  # get an exception from inside ruby-marc, and it may change
444
- # in future implementations.
442
+ # in future implementations.
445
443
  if params[:internal_encoding]
446
- if RUBY_VERSION >= '3.0'
447
- str = str.encode(params[:internal_encoding], **params)
444
+ str = if RUBY_VERSION >= "3.0"
445
+ str.encode(params[:internal_encoding], **params)
448
446
  else
449
- str = str.encode(params[:internal_encoding], params)
447
+ str.encode(params[:internal_encoding], params)
450
448
  end
451
- elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
449
+ elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
452
450
 
453
- if params[:validate_encoding] == true && ! str.valid_encoding?
454
- raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
451
+ if params[:validate_encoding] == true && !str.valid_encoding?
452
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
455
453
  end
456
454
  if params[:invalid] == :replace
457
455
  str = str.scrub(params[:replace])
458
456
  end
459
-
460
- end
461
- end
462
- return str
463
- end
464
- end
465
-
466
-
467
457
 
458
+ end
459
+ end
460
+ str
461
+ end
462
+ end
468
463
 
469
464
  # Like Reader ForgivingReader lets you read in a batch of MARC21 records
470
465
  # but it does not use record lengths and field byte offets found in the
@@ -479,22 +474,19 @@ module MARC
479
474
  #
480
475
  # **NOTE**: ForgivingReader _may_ have unpredictable results when used
481
476
  # with marc records with char encoding other than system default (usually
482
- # UTF8), _especially_ if you have Encoding.default_internal set.
477
+ # UTF8), _especially_ if you have Encoding.default_internal set.
483
478
  #
484
479
  # Implemented a sub-class of Reader over-riding #each, so we still
485
480
  # get DRY Reader's #initialize with proper char encoding options
486
- # and handling.
481
+ # and handling.
487
482
  class ForgivingReader < Reader
488
-
489
483
  def each
490
484
  @handle.each_line(END_OF_RECORD) do |raw|
491
- begin
492
- record = MARC::Reader.decode(raw, @encoding_options.merge(:forgiving => true))
493
- yield record
494
- rescue StandardError => e
495
- # caught exception just keep barrelling along
496
- # TODO add logging
497
- end
485
+ record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
486
+ yield record
487
+ rescue
488
+ # caught exception just keep barrelling along
489
+ # TODO add logging
498
490
  end
499
491
  end
500
492
  end