marc 1.1.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +116 -30
  7. data/Gemfile +5 -0
  8. data/README.md +239 -46
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -87
  22. data/lib/marc/reader.rb +116 -124
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -82
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +28 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -34
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +101 -94
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +129 -22
data/lib/marc/reader.rb CHANGED
@@ -1,10 +1,8 @@
1
- require 'scrub_rb'
2
-
3
1
  # Note: requiring 'marc/marc8/to_unicode' below, in #initialize,
4
2
  # only when necessary
5
3
 
6
4
  module MARC
7
- # A class for reading MARC binary (ISO 2709) files.
5
+ # A class for reading MARC binary (ISO 2709) files.
8
6
  #
9
7
  # == Character Encoding
10
8
  #
@@ -12,7 +10,7 @@ module MARC
12
10
  # If illegal bytes for that character encoding are encountered in certain
13
11
  # operations, ruby will raise an exception. If a String is incorrectly
14
12
  # tagged with the wrong character encoding, that makes it fairly likely
15
- # an illegal byte for the specified encoding will be encountered.
13
+ # an illegal byte for the specified encoding will be encountered.
16
14
  #
17
15
  # So when reading binary MARC data with the MARC::Reader, it's important
18
16
  # that you let it know the expected encoding:
@@ -21,7 +19,7 @@ module MARC
21
19
  #
22
20
  # If you leave off 'external_encoding', it will use the ruby environment
23
21
  # Encoding.default_external, which is usually UTF-8 but may depend on your
24
- # environment.
22
+ # environment.
25
23
  #
26
24
  # Even if you expect your data to be (eg) UTF-8, it may include bad/illegal
27
25
  # bytes. By default MARC::Reader will leave these in the produced Strings,
@@ -29,58 +27,58 @@ module MARC
29
27
  # to catch this early, and ask MARC::Reader to raise immediately on illegal
30
28
  # bytes:
31
29
  #
32
- # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
30
+ # MARC::Reader.new("path/to/file.mrc", :external_encoding => "UTF-8",
33
31
  # :validate_encoding => true)
34
32
  #
35
33
  # Alternately, you can have MARC::Reader replace illegal bytes
36
34
  # with the Unicode Replacement Character, or with a string
37
35
  # of your choice (including the empty string, meaning just omit the bad bytes)
38
36
  #
39
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
37
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
40
38
  # :invalid => :replace)
41
- # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
39
+ # MARC::Reader("path/to/file.mrc", :external_encoding => "UTF-8",
42
40
  # :invalid => :replace, :replace => "")
43
41
  #
44
42
  # If you supply an :external_encoding argument, MARC::Reader will
45
43
  # always assume that encoding -- if you leave it off, MARC::Reader
46
44
  # will use the encoding tagged on any input you pass in, such
47
- # as Strings or File handles.
45
+ # as Strings or File handles.
48
46
  #
49
47
  # # marc data will have same encoding as string.encoding:
50
48
  # MARC::Reader.decode( string )
51
49
  #
52
50
  # # Same, values will have encoding of string.encoding:
53
- # MARC::Reader.new(StringIO.new(string))
51
+ # MARC::Reader.new(StringIO.new(string))
54
52
  #
55
53
  # # data values will have cp866 encoding, per external_encoding of
56
54
  # # File object passed in
57
55
  # MARC::Reader.new(File.new("myfile.marc", "r:cp866"))
58
56
  #
59
57
  # # explicitly tell MARC::Reader the encoding
60
- # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
58
+ # MARC::Reader.new("myfile.marc", :external_encoding => "cp866")
61
59
  #
62
60
  # === MARC-8
63
61
  #
64
62
  # The legacy MARC-8 encoding needs to be handled differently, because
65
- # there is no built-in support in ruby for MARC-8.
63
+ # there is no built-in support in ruby for MARC-8.
66
64
  #
67
65
  # You _can_ specify "MARC-8" as an external encoding. It will trigger
68
- # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
66
+ # trans-code to UTF-8 (NFC-normalized) in the internal ruby strings.
69
67
  #
70
68
  # MARC::Reader.new("marc8.mrc", :external_encoding => "MARC-8")
71
69
  #
72
70
  # For external_encoding "MARC-8", :validate_encoding is always true,
73
71
  # there's no way to ignore bad bytes in MARC-8 when transcoding to
74
- # unicode. However, just as with other encodings, the
72
+ # unicode. However, just as with other encodings, the
75
73
  # `:invalid => :replace` and `:replace => "string"`
76
- # options can be used to replace bad bytes instead of raising.
74
+ # options can be used to replace bad bytes instead of raising.
77
75
  #
78
76
  # If you want your MARC-8 to be transcoded internally to something
79
77
  # other than UTF-8, you can use the :internal_encoding option
80
- # which works with any encoding in MARC::Reader.
78
+ # which works with any encoding in MARC::Reader.
81
79
  #
82
- # MARC::Reader.new("marc8.mrc",
83
- # :external_encoding => "MARC-8",
80
+ # MARC::Reader.new("marc8.mrc",
81
+ # :external_encoding => "MARC-8",
84
82
  # :internal_encoding => "UTF-16LE")
85
83
  #
86
84
  # If you want to read in MARC-8 without transcoding, leaving the
@@ -90,48 +88,48 @@ module MARC
90
88
  #
91
89
  # MARC::Reader.new("marc8.mrc", :external_encoding => "binary")
92
90
  #
93
- # Please note that MARC::Reader does _not_ currently have any facilities
94
- # for guessing encoding from MARC21 leader byte 9, that is ignored.
91
+ # Please note that MARC::Reader does _not_ currently have any facilities
92
+ # for guessing encoding from MARC21 leader byte 9, that is ignored.
95
93
  #
96
94
  # === Complete Encoding Options
97
95
  #
98
96
  # These options can all be used on MARC::Reader.new _or_ MARC::Reader.decode
99
97
  # to specify external encoding, ask for a transcode to a different
100
- # encoding on read, or validate or replace bad bytes in source.
98
+ # encoding on read, or validate or replace bad bytes in source.
101
99
  #
102
100
  # [:external_encoding]
103
101
  # What encoding to consider the MARC record's values to be in. This option
104
- # takes precedence over the File handle or String argument's encodings.
102
+ # takes precedence over the File handle or String argument's encodings.
105
103
  # [:internal_encoding]
106
104
  # Ask MARC::Reader to transcode to this encoding in memory after reading
107
- # the file in.
105
+ # the file in.
108
106
  # [:validate_encoding]
109
107
  # If you pass in `true`, MARC::Reader will promise to raise an Encoding::InvalidByteSequenceError
110
108
  # if there are illegal bytes in the source for the :external_encoding. There is
111
109
  # a performance penalty for this check. Without this option, an exception
112
- # _may_ or _may not_ be raised, and whether an exception or raised (or
110
+ # _may_ or _may not_ be raised, and whether an exception or raised (or
113
111
  # what class the exception has) may change in future ruby-marc versions
114
- # without warning.
112
+ # without warning.
115
113
  # [:invalid]
116
114
  # Just like String#encode, set to :replace and any bytes in source data
117
- # illegal for the source encoding will be replaced with the unicode
115
+ # illegal for the source encoding will be replaced with the unicode
118
116
  # replacement character (when in unicode encodings), or else '?'. Overrides
119
117
  # :validate_encoding. This can help you sanitize your input and
120
- # avoid ruby "invalid UTF-8 byte" exceptions later.
118
+ # avoid ruby "invalid UTF-8 byte" exceptions later.
121
119
  # [:replace]
122
120
  # Just like String#encode, combine with `:invalid=>:replace`, set
123
121
  # your own replacement string for invalid bytes. You may use the
124
- # empty string to simply eliminate invalid bytes.
122
+ # empty string to simply eliminate invalid bytes.
125
123
  #
126
124
  # === Warning on ruby File's own :internal_encoding, and unsafe transcoding from ruby
127
125
  #
128
- # Be careful with using an explicit File object with the File's own
129
- # :internal_encoding set -- it can cause ruby to transcode your data
130
- # _before_ MARC::Reader gets it, changing the bytecount and making the
126
+ # Be careful with using an explicit File object with the File's own
127
+ # :internal_encoding set -- it can cause ruby to transcode your data
128
+ # _before_ MARC::Reader gets it, changing the bytecount and making the
131
129
  # marc record unreadable in some cases. This
132
130
  # applies to Encoding.default_encoding too!
133
131
  #
134
- # # May in some cases result in unreadable marc and an exception
132
+ # # May in some cases result in unreadable marc and an exception
135
133
  # MARC::Reader.new( File.new("marc_in_cp866.mrc", "r:cp866:utf-8") )
136
134
  #
137
135
  # # May in some cases result in unreadable marc and an exception
@@ -156,7 +154,7 @@ module MARC
156
154
  # https://jira.codehaus.org/browse/JRUBY-6637
157
155
  #
158
156
  # We recommend using the latest version of jruby, especially
159
- # at least jruby 1.7.6.
157
+ # at least jruby 1.7.6.
160
158
  class Reader
161
159
  include Enumerable
162
160
 
@@ -182,43 +180,42 @@ module MARC
182
180
  #
183
181
  # Also, if your data encoded with non ascii/utf-8 encoding
184
182
  # (for ex. when reading RUSMARC data) and you use ruby 1.9
185
- # you can specify source data encoding with an option.
183
+ # you can specify source data encoding with an option.
186
184
  #
187
185
  # reader = MARC::Reader.new('marc.dat', :external_encoding => 'cp866')
188
186
  #
189
187
  # or, you can pass IO, opened in the corresponding encoding
190
188
  #
191
189
  # reader = MARC::Reader.new(File.new('marc.dat', 'r:cp866'))
192
- def initialize(file, options = {})
190
+ def initialize(file, options = {})
193
191
  @encoding_options = {}
194
192
  # all can be nil
195
193
  [:internal_encoding, :external_encoding, :invalid, :replace, :validate_encoding].each do |key|
196
194
  @encoding_options[key] = options[key] if options.has_key?(key)
197
195
  end
198
-
199
- if file.is_a?(String)
196
+
197
+ if file.is_a?(String)
200
198
  @handle = File.new(file)
201
- elsif file.respond_to?("read", 5)
199
+ elsif file.respond_to?(:read, 5)
202
200
  @handle = file
203
201
  else
204
202
  raise ArgumentError, "must pass in path or file"
205
203
  end
206
-
207
- if (! @encoding_options[:external_encoding] ) && @handle.respond_to?(:external_encoding)
204
+
205
+ if (!@encoding_options[:external_encoding]) && @handle.respond_to?(:external_encoding)
208
206
  # use file encoding only if we didn't already have an explicit one,
209
- # explicit one takes precedence.
207
+ # explicit one takes precedence.
210
208
  #
211
209
  # Note, please don't use ruby's own internal_encoding transcode
212
210
  # with binary marc data, the transcode can mess up the byte count
213
- # and make it unreadable.
211
+ # and make it unreadable.
214
212
  @encoding_options[:external_encoding] ||= @handle.external_encoding
215
213
  end
216
214
 
217
215
  # Only pull in the MARC8 translation if we need it, since it's really big
218
- if @encoding_options[:external_encoding] == "MARC-8"
219
- require 'marc/marc8/to_unicode' unless defined? MARC::Marc8::ToUnicode
216
+ if @encoding_options[:external_encoding] == "MARC-8"
217
+ require "marc/marc8/to_unicode" unless defined? MARC::Marc8::ToUnicode
220
218
  end
221
-
222
219
  end
223
220
 
224
221
  # to support iteration:
@@ -226,13 +223,13 @@ module MARC
226
223
  # print record
227
224
  # end
228
225
  def each
229
- unless block_given?
230
- return self.enum_for(:each)
231
- else
232
- self.each_raw do |raw|
233
- record = self.decode(raw)
226
+ if block_given?
227
+ each_raw do |raw|
228
+ record = decode(raw)
234
229
  yield record
235
230
  end
231
+ else
232
+ enum_for(:each)
236
233
  end
237
234
  end
238
235
 
@@ -257,10 +254,8 @@ module MARC
257
254
  #
258
255
  # If no block is given, an enumerator is returned
259
256
  def each_raw
260
- unless block_given?
261
- return self.enum_for(:each_raw)
262
- else
263
- while rec_length_s = @handle.read(5)
257
+ if block_given?
258
+ while (rec_length_s = @handle.read(5))
264
259
  # make sure the record length looks like an integer
265
260
  rec_length_i = rec_length_s.to_i
266
261
  if rec_length_i == 0
@@ -269,9 +264,11 @@ module MARC
269
264
 
270
265
  # get the raw MARC21 for a record back from the file
271
266
  # using the record length
272
- raw = rec_length_s + @handle.read(rec_length_i-5)
267
+ raw = rec_length_s + @handle.read(rec_length_i - 5)
273
268
  yield raw
274
269
  end
270
+ else
271
+ enum_for(:each_raw)
275
272
  end
276
273
  end
277
274
 
@@ -280,7 +277,7 @@ module MARC
280
277
  # Wraps the class method MARC::Reader::decode, using the encoding options of
281
278
  # the MARC::Reader instance.
282
279
  def decode(marc)
283
- return MARC::Reader.decode(marc, @encoding_options)
280
+ MARC::Reader.decode(marc, @encoding_options)
284
281
  end
285
282
 
286
283
  # A static method for turning raw MARC data in transission
@@ -288,34 +285,34 @@ module MARC
288
285
  # First argument is a String
289
286
  # options include:
290
287
  # [:external_encoding] encoding of MARC record data values
291
- # [:forgiving] needs more docs, true is some kind of forgiving
292
- # of certain kinds of bad MARC.
293
- def self.decode(marc, params={})
288
+ # [:forgiving] needs more docs, true is some kind of forgiving
289
+ # of certain kinds of bad MARC.
290
+ def self.decode(marc, params = {})
294
291
  if params.has_key?(:encoding)
295
- $stderr.puts "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
292
+ warn "DEPRECATION WARNING: MARC::Reader.decode :encoding option deprecated, please use :external_encoding"
296
293
  params[:external_encoding] = params.delete(:encoding)
297
294
  end
298
-
299
- if (! params.has_key? :external_encoding ) && marc.respond_to?(:encoding)
295
+
296
+ if (!params.has_key? :external_encoding) && marc.respond_to?(:encoding)
300
297
  # If no forced external_encoding giving, respect the encoding
301
- # declared on the string passed in.
298
+ # declared on the string passed in.
302
299
  params[:external_encoding] = marc.encoding
303
300
  end
304
301
  # And now that we've recorded the current encoding, we force
305
302
  # to binary encoding, because we're going to be doing byte arithmetic,
306
- # and want to avoid byte-vs-char confusion.
303
+ # and want to avoid byte-vs-char confusion.
307
304
  marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
308
-
309
- record = Record.new()
310
- record.leader = marc[0..LEADER_LENGTH-1]
305
+
306
+ record = Record.new
307
+ record.leader = marc[0..LEADER_LENGTH - 1]
311
308
 
312
309
  # where the field data starts
313
310
  base_address = record.leader[12..16].to_i
314
311
 
315
312
  # get the byte offsets from the record directory
316
- directory = marc[LEADER_LENGTH..base_address-1]
313
+ directory = marc[LEADER_LENGTH..base_address - 1]
317
314
 
318
- raise MARC::Exception.new("invalid directory in record") if directory == nil
315
+ raise MARC::Exception.new("invalid directory in record") if directory.nil?
319
316
 
320
317
  # the number of fields in the record corresponds to
321
318
  # how many directory entries there are
@@ -324,20 +321,19 @@ module MARC
324
321
  # when operating in forgiving mode we just split on end of
325
322
  # field instead of using calculated byte offsets from the
326
323
  # directory
327
- if params[:forgiving]
324
+ if params[:forgiving]
328
325
  marc_field_data = marc[base_address..-1]
329
326
  # It won't let us do the split on bad utf8 data, but
330
327
  # we haven't yet set the 'proper' encoding or used
331
328
  # our correction/replace options. So call it binary for now.
332
329
  marc_field_data.force_encoding("binary") if marc_field_data.respond_to?(:force_encoding)
333
-
330
+
334
331
  all_fields = marc_field_data.split(END_OF_FIELD)
335
332
  else
336
- mba = marc.bytes.to_a
333
+ mba = marc.bytes.to_a
337
334
  end
338
335
 
339
- 0.upto(num_fields-1) do |field_num|
340
-
336
+ 0.upto(num_fields - 1) do |field_num|
341
337
  # pull the directory entry for a field out
342
338
  entry_start = field_num * DIRECTORY_ENTRY_LENGTH
343
339
  entry_end = entry_start + DIRECTORY_ENTRY_LENGTH
@@ -350,12 +346,12 @@ module MARC
350
346
  # if we were told to be forgiving we just use the
351
347
  # next available chuck of field data that we
352
348
  # split apart based on the END_OF_FIELD
353
- field_data = ''
349
+ field_data = ""
354
350
  if params[:forgiving]
355
- field_data = all_fields.shift()
351
+ field_data = all_fields.shift
356
352
 
357
- # otherwise we actually use the byte offsets in
358
- # directory to figure out what field data to extract
353
+ # otherwise we actually use the byte offsets in
354
+ # directory to figure out what field data to extract
359
355
  else
360
356
  length = entry[3..6].to_i
361
357
  offset = entry[7..11].to_i
@@ -366,11 +362,11 @@ module MARC
366
362
 
367
363
  # remove end of field
368
364
  field_data.delete!(END_OF_FIELD)
369
-
365
+
370
366
  # add a control field or data field
371
367
  if MARC::ControlField.control_tag?(tag)
372
- field_data = MARC::Reader.set_encoding( field_data , params)
373
- record.append(MARC::ControlField.new(tag,field_data))
368
+ field_data = MARC::Reader.set_encoding(field_data, params)
369
+ record.append(MARC::ControlField.new(tag, field_data))
374
370
  else
375
371
  field = MARC::DataField.new(tag)
376
372
 
@@ -379,17 +375,17 @@ module MARC
379
375
 
380
376
  # must have at least 2 elements (indicators, and 1 subfield)
381
377
  # TODO some sort of logging?
382
- next if subfields.length() < 2
378
+ next if subfields.length < 2
383
379
 
384
380
  # get indicators
385
- indicators = MARC::Reader.set_encoding( subfields.shift(), params)
386
- field.indicator1 = indicators[0,1]
387
- field.indicator2 = indicators[1,1]
381
+ indicators = MARC::Reader.set_encoding(subfields.shift, params)
382
+ field.indicator1 = indicators[0, 1]
383
+ field.indicator2 = indicators[1, 1]
388
384
 
389
385
  # add each subfield to the field
390
- subfields.each() do |data|
391
- data = MARC::Reader.set_encoding( data, params )
392
- subfield = MARC::Subfield.new(data[0,1],data[1..-1])
386
+ subfields.each do |data|
387
+ data = MARC::Reader.set_encoding(data, params)
388
+ subfield = MARC::Subfield.new(data[0, 1], data[1..-1])
393
389
  field.append(subfield)
394
390
  end
395
391
 
@@ -398,10 +394,12 @@ module MARC
398
394
  end
399
395
  end
400
396
 
401
- return record
402
- end
397
+ raise MARC::RecordException, record unless record.valid?
403
398
 
404
- # input passed in probably has 'binary' encoding.
399
+ record
400
+ end
401
+
402
+ # input passed in probably has 'binary' encoding.
405
403
  # We'll set it to the proper encoding, and depending on settings, optionally
406
404
  # * check for valid encoding
407
405
  # * raise if not valid
@@ -411,16 +409,16 @@ module MARC
411
409
  # Special case for encoding "MARC-8" -- will be transcoded to
412
410
  # UTF-8 (then further transcoded to external_encoding, if set).
413
411
  # For "MARC-8", validate_encoding is always true, there's no way to
414
- # ignore bad bytes.
412
+ # ignore bad bytes.
415
413
  #
416
414
  # Params options:
417
- #
418
- # * external_encoding: what encoding the input is expected to be in
415
+ #
416
+ # * external_encoding: what encoding the input is expected to be in
419
417
  # * validate_encoding: if true, will raise if an invalid encoding
420
418
  # * invalid: if set to :replace, will replace bad bytes with replacement
421
- # chars instead of raising.
419
+ # chars instead of raising.
422
420
  # * replace: Set replacement char for use with 'invalid', otherwise defaults
423
- # to unicode replacement char, or question mark.
421
+ # to unicode replacement char, or question mark.
424
422
  def self.set_encoding(str, params)
425
423
  if str.respond_to?(:force_encoding)
426
424
  if params[:external_encoding]
@@ -430,41 +428,38 @@ module MARC
430
428
  else
431
429
  str = str.force_encoding(params[:external_encoding])
432
430
  end
433
- end
434
-
431
+ end
432
+
435
433
  # If we're transcoding anyway, pass our invalid/replace options
436
434
  # on to String#encode, which will take care of them -- or raise
437
- # with illegal bytes without :replace=>:invalid.
435
+ # with illegal bytes without :replace=>:invalid.
438
436
  #
439
437
  # If we're NOT transcoding, we need to use our own pure-ruby
440
438
  # implementation to do invalid byte replacements. OR to raise
441
439
  # a predicatable exception iff :validate_encoding, otherwise
442
440
  # for performance we won't check, and you may or may not
443
441
  # get an exception from inside ruby-marc, and it may change
444
- # in future implementations.
442
+ # in future implementations.
445
443
  if params[:internal_encoding]
446
- if RUBY_VERSION >= '3.0'
447
- str = str.encode(params[:internal_encoding], **params)
444
+ str = if RUBY_VERSION >= "3.0"
445
+ str.encode(params[:internal_encoding], **params)
448
446
  else
449
- str = str.encode(params[:internal_encoding], params)
447
+ str.encode(params[:internal_encoding], params)
450
448
  end
451
- elsif (params[:invalid] || params[:replace] || (params[:validate_encoding] == true))
449
+ elsif params[:invalid] || params[:replace] || (params[:validate_encoding] == true)
452
450
 
453
- if params[:validate_encoding] == true && ! str.valid_encoding?
454
- raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
451
+ if params[:validate_encoding] == true && !str.valid_encoding?
452
+ raise Encoding::InvalidByteSequenceError.new("invalid byte in string for source encoding #{str.encoding.name}")
455
453
  end
456
454
  if params[:invalid] == :replace
457
455
  str = str.scrub(params[:replace])
458
456
  end
459
-
460
- end
461
- end
462
- return str
463
- end
464
- end
465
-
466
-
467
457
 
458
+ end
459
+ end
460
+ str
461
+ end
462
+ end
468
463
 
469
464
  # Like Reader ForgivingReader lets you read in a batch of MARC21 records
470
465
  # but it does not use record lengths and field byte offets found in the
@@ -479,22 +474,19 @@ module MARC
479
474
  #
480
475
  # **NOTE**: ForgivingReader _may_ have unpredictable results when used
481
476
  # with marc records with char encoding other than system default (usually
482
- # UTF8), _especially_ if you have Encoding.default_internal set.
477
+ # UTF8), _especially_ if you have Encoding.default_internal set.
483
478
  #
484
479
  # Implemented a sub-class of Reader over-riding #each, so we still
485
480
  # get DRY Reader's #initialize with proper char encoding options
486
- # and handling.
481
+ # and handling.
487
482
  class ForgivingReader < Reader
488
-
489
483
  def each
490
484
  @handle.each_line(END_OF_RECORD) do |raw|
491
- begin
492
- record = MARC::Reader.decode(raw, @encoding_options.merge(:forgiving => true))
493
- yield record
494
- rescue StandardError => e
495
- # caught exception just keep barrelling along
496
- # TODO add logging
497
- end
485
+ record = MARC::Reader.decode(raw, @encoding_options.merge(forgiving: true))
486
+ yield record
487
+ rescue
488
+ # caught exception just keep barrelling along
489
+ # TODO add logging
498
490
  end
499
491
  end
500
492
  end