marc 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +106 -29
  7. data/Gemfile +15 -0
  8. data/README.md +240 -47
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -86
  22. data/lib/marc/reader.rb +119 -121
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -81
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +23 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -32
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +99 -87
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +94 -9
data/Rakefile CHANGED
@@ -1,22 +1,22 @@
1
- require 'rubygems'
1
+ require "rubygems"
2
2
 
3
- require 'rake'
4
- require 'rake/testtask'
5
- require 'rdoc/task'
6
- require 'bundler/gem_tasks'
3
+ require "rake"
4
+ require "rake/testtask"
5
+ require "rdoc/task"
6
+ require "bundler/gem_tasks"
7
+ require "standard/rake"
7
8
 
9
+ task default: [:test]
10
+ task format: "standard:fix"
8
11
 
9
- task :default => [:test]
10
-
11
- Rake::TestTask.new('test') do |t|
12
- t.libs << 'lib'
13
- t.pattern = 'test/**/tc_*.rb'
12
+ Rake::TestTask.new("test") do |t|
13
+ t.libs << "lib"
14
+ t.pattern = "test/**/tc_*.rb"
14
15
  t.verbose = true
15
16
  end
16
17
 
17
-
18
- Rake::RDocTask.new('doc') do |rd|
18
+ Rake::RDocTask.new("doc") do |rd|
19
19
  rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
20
- rd.main = 'MARC::Record'
21
- rd.rdoc_dir = 'doc'
20
+ rd.main = "MARC::Record"
21
+ rd.rdoc_dir = "doc"
22
22
  end
data/bin/marc ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc'
4
+
5
+ # the filename
6
+ filename = ARGV[0]
7
+
8
+ reader = MARC::ForgivingReader.new(filename)
9
+
10
+ reader.each { |record|
11
+ puts record
12
+ }
13
+
14
+
data/bin/marc2xml ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc'
4
+
5
+ # the filename
6
+ filename = ARGV[0]
7
+
8
+ reader = MARC::ForgivingReader.new(filename)
9
+ writer = MARC::XMLWriter.new($stdout)
10
+
11
+ reader.each { |record|
12
+ writer.write(record)
13
+ }
14
+
15
+ writer.close
16
+
17
+
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # usage: xml2marc.rb marc.xml > marc.dat
4
+
5
+ require "marc"
6
+
7
+ reader = MARC::XMLReader.new(ARGV.shift)
8
+ reader.each do |record|
9
+ $stdout.write(record.to_marc)
10
+ end
@@ -1,5 +1,6 @@
1
- module MARC
1
+ # frozen_string_literal: true
2
2
 
3
+ module MARC
3
4
  # constants used in MARC21 reading/writing
4
5
  LEADER_LENGTH = 24
5
6
  DIRECTORY_ENTRY_LENGTH = 12
@@ -7,12 +8,11 @@ module MARC
7
8
  END_OF_FIELD = 0x1E.chr
8
9
  END_OF_RECORD = 0x1D.chr
9
10
 
10
- # constants used in XML reading/writing
11
+ # constants used in XML reading/writing
11
12
  MARC_NS = "http://www.loc.gov/MARC21/slim"
12
13
  MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
13
14
 
14
15
  # marc-hash
15
16
  MARCHASH_MAJOR_VERSION = 1
16
17
  MARCHASH_MINOR_VERSION = 0
17
-
18
18
  end
@@ -1,23 +1,21 @@
1
- require 'set'
1
+ require "set"
2
2
 
3
3
  module MARC
4
-
5
- # MARC records contain control fields, each of which has a
4
+ # MARC records contain control fields, each of which has a
6
5
  # tag and value. Tags for control fields must be in the
7
6
  # 001-009 range or be specially added to the @@control_tags Set
8
7
 
9
8
  class ControlField
10
-
11
9
  # Initially, control tags are the numbers 1 through 9 or the string '000'
12
- @@control_tags = Set.new(%w{000 001 002 003 004 005 006 007 008 009})
13
-
10
+ @@control_tags = Set.new(%w[000 001 002 003 004 005 006 007 008 009])
11
+
14
12
  def self.control_tags
15
- return @@control_tags
13
+ @@control_tags
16
14
  end
17
15
 
18
16
  # A tag is a control tag if tag.to_s is a member of the @@control_tags set.
19
17
  def self.control_tag?(tag)
20
- return @@control_tags.include? tag.to_s
18
+ @@control_tags.include? tag.to_s
21
19
  end
22
20
 
23
21
  # the tag value (007, 008, etc)
@@ -26,46 +24,60 @@ module MARC
26
24
  # the value of the control field
27
25
  attr_accessor :value
28
26
 
29
- # The constructor which must be passed a tag value and
27
+ # The constructor which must be passed a tag value and
30
28
  # an optional value for the field.
31
29
 
32
- def initialize(tag,value='')
30
+ def initialize(tag, value = "")
33
31
  @tag = tag
34
32
  @value = value
35
- if not MARC::ControlField.control_tag?(@tag)
36
- raise MARC::Exception.new(), "tag must be in 001-009 or in the MARC::ControlField.control_tags set"
33
+ end
34
+
35
+ # Returns true if there are no error messages associated with the field
36
+ def valid?
37
+ errors.none?
38
+ end
39
+
40
+ # Returns an array of validation errors
41
+ def errors
42
+ messages = []
43
+
44
+ unless MARC::ControlField.control_tag?(@tag)
45
+ messages << "tag #{@tag.inspect} must be in 001-009 or in the MARC::ControlField.control_tags set"
37
46
  end
47
+
48
+ messages
38
49
  end
39
50
 
40
51
  # Two control fields are equal if their tags and values are equal.
41
52
 
42
53
  def ==(other)
54
+ if !other.is_a?(ControlField)
55
+ return false
56
+ end
43
57
  if @tag != other.tag
44
- return false
58
+ return false
45
59
  elsif @value != other.value
46
60
  return false
47
61
  end
48
- return true
62
+ true
49
63
  end
50
64
 
51
65
  # turning it into a marc-hash element
52
66
  def to_marchash
53
- return [@tag, @value]
67
+ [@tag, @value]
54
68
  end
55
-
69
+
56
70
  # Turn the control field into a hash for MARC-in-JSON
57
71
  def to_hash
58
- return {@tag=>@value}
72
+ {@tag => @value}
59
73
  end
60
-
74
+
61
75
  def to_s
62
- return "#{tag} #{value}"
76
+ "#{tag} #{value}"
63
77
  end
64
78
 
65
79
  def =~(regex)
66
- return self.to_s =~ regex
67
- end
68
-
80
+ to_s =~ regex
81
+ end
69
82
  end
70
-
71
83
  end
@@ -1,11 +1,11 @@
1
1
  module MARC
2
- # MARC records contain data fields, each of which has a tag,
2
+ # MARC records contain data fields, each of which has a tag,
3
3
  # indicators and subfields. Tags for data fields must are all
4
4
  # three-character tags that are not control fields (generally,
5
5
  # any numeric tag greater than 009).
6
6
  #
7
7
  # Accessor attributes: tag, indicator1, indicator2
8
- #
8
+ #
9
9
  # DataField mixes in Enumerable to enable access to it's constituent
10
10
  # Subfield objects. For instance, if you have a DataField representing
11
11
  # a 856 tag, and want to find all 'z' subfields:
@@ -14,7 +14,7 @@ module MARC
14
14
  #
15
15
  # Also, the accessor 'subfields' is an array of MARC::Subfield objects
16
16
  # which can be accessed or modified by the client directly if
17
- # neccesary.
17
+ # neccesary.
18
18
 
19
19
  class DataField
20
20
  include Enumerable
@@ -31,90 +31,100 @@ module MARC
31
31
  # A list of MARC::Subfield objects
32
32
  attr_accessor :subfields
33
33
 
34
-
35
34
  # Create a new field with tag, indicators and subfields.
36
- # Subfields are passed in as comma separated list of
37
- # MARC::Subfield objects,
38
- #
35
+ # Subfields are passed in as comma separated list of
36
+ # MARC::Subfield objects,
37
+ #
39
38
  # field = MARC::DataField.new('245','0','0',
40
39
  # MARC::Subfield.new('a', 'Consilience :'),
41
40
  # MARC::Subfield.new('b', 'the unity of knowledge '),
42
41
  # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
43
- #
42
+ #
44
43
  # or using a shorthand:
45
- #
44
+ #
46
45
  # field = MARC::DataField.new('245','0','0',
47
46
  # ['a', 'Consilience :'],['b','the unity of knowledge '],
48
47
  # ['c', 'by Edward O. Wilson.'] )
49
48
 
50
- def initialize(tag, i1=' ', i2=' ', *subfields)
51
- # if the tag is less than 3 characters long and
49
+ def initialize(tag, i1 = " ", i2 = " ", *subfields)
50
+ # if the tag is less than 3 characters long and
52
51
  # the string is all numeric then we pad with zeros
53
- if tag.length < 3 and /^[0-9]+$/ =~ tag
54
- @tag = "%03d" % tag
52
+ @tag = if (tag.length < 3) && (/^[0-9]+$/ =~ tag)
53
+ "%03d" % tag
55
54
  else
56
- @tag = tag
55
+ tag
57
56
  end
58
- # can't allow nil to be passed in or else it'll
57
+ # can't allow nil to be passed in or else it'll
59
58
  # screw us up later when we try to encode
60
- @indicator1 = i1 == nil ? ' ' : i1
61
- @indicator2 = i2 == nil ? ' ' : i2
62
-
63
- @subfields = []
59
+ @indicator1 = i1.nil? ? " " : i1
60
+ @indicator2 = i2.nil? ? " " : i2
64
61
 
65
- # must use MARC::ControlField for tags < 010 or
66
- # those in MARC::ControlField#extra_control_fields
67
-
68
- if MARC::ControlField.control_tag?(@tag)
69
- raise MARC::Exception.new(),
70
- "MARC::DataField objects can't have ControlField tag '" + @tag + "')"
71
- end
62
+ @subfields = []
72
63
 
73
64
  # allows MARC::Subfield objects to be passed directly
74
65
  # or a shorthand of ['a','Foo'], ['b','Bar']
75
- subfields.each do |subfield|
66
+ subfields.each do |subfield|
76
67
  case subfield
77
68
  when MARC::Subfield
78
69
  @subfields.push(subfield)
79
70
  when Array
80
71
  if subfield.length > 2
81
- raise MARC::Exception.new(),
82
- "arrays must only have 2 elements: " + subfield.to_s
72
+ raise MARC::Exception.new,
73
+ "arrays must only have 2 elements: " + subfield.to_s
83
74
  end
84
75
  @subfields.push(
85
- MARC::Subfield.new(subfield[0],subfield[1]))
86
- else
87
- raise MARC::Exception.new(),
76
+ MARC::Subfield.new(subfield[0], subfield[1])
77
+ )
78
+ else
79
+ raise MARC::Exception.new,
88
80
  "invalid subfield type #{subfield.class}"
89
81
  end
90
82
  end
91
83
  end
92
84
 
85
+ # Returns true if there are no error messages associated with the field
86
+ def valid?
87
+ errors.none?
88
+ end
89
+
90
+ # Returns an array of validation errors
91
+ def errors
92
+ messages = []
93
+
94
+ # must use MARC::ControlField for tags < 010 or
95
+ # those in MARC::ControlField#extra_control_fields
96
+
97
+ if MARC::ControlField.control_tag?(@tag)
98
+ messages << "MARC::DataField objects can't have ControlField tag '" + @tag + "'"
99
+ end
100
+
101
+ messages
102
+ end
93
103
 
94
104
  # Returns a string representation of the field such as:
95
105
  # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
96
106
 
97
107
  def to_s
98
108
  str = "#{tag} "
99
- str += "#{indicator1}#{indicator2} "
109
+ str += "#{indicator1}#{indicator2} "
100
110
  @subfields.each { |subfield| str += subfield.to_s }
101
- return str
111
+ str
102
112
  end
103
113
 
104
114
  # Turn into a marc-hash structure
105
115
  def to_marchash
106
- return [@tag, @indicator1, @indicator2, @subfields.map {|sf| [sf.code, sf.value]} ]
116
+ [@tag, @indicator1, @indicator2, @subfields.map { |sf| [sf.code, sf.value] }]
107
117
  end
108
-
118
+
109
119
  # Turn the variable field and subfields into a hash for MARC-in-JSON
110
-
120
+
111
121
  def to_hash
112
- field_hash = {@tag=>{'ind1'=>@indicator1,'ind2'=>@indicator2,'subfields'=>[]}}
113
- self.each do |subfield|
114
- field_hash[@tag]['subfields'] << {subfield.code=>subfield.value}
122
+ field_hash = {@tag => {"ind1" => @indicator1, "ind2" => @indicator2, "subfields" => []}}
123
+ each do |subfield|
124
+ field_hash[@tag]["subfields"] << {subfield.code => subfield.value}
115
125
  end
116
126
  field_hash
117
- end
127
+ end
118
128
 
119
129
  # Add a subfield (MARC::Subfield) to the field
120
130
  # field.append(MARC::Subfield.new('a','Dave Thomas'))
@@ -123,72 +133,69 @@ module MARC
123
133
  @subfields.push(subfield)
124
134
  end
125
135
 
126
-
127
-
128
136
  # You can iterate through the subfields in a Field:
129
137
  # field.each {|s| print s}
130
138
 
131
139
  def each
132
- for subfield in subfields
140
+ subfields.each do |subfield|
133
141
  yield subfield
134
142
  end
135
143
  end
136
144
 
137
- #def each_by_code(filter)
145
+ # def each_by_code(filter)
138
146
  # @subfields.each_by_code(filter)
139
- #end
147
+ # end
140
148
 
141
- # You can lookup subfields with this shorthand. Note it
149
+ # You can lookup subfields with this shorthand. Note it
142
150
  # will return a string and not a MARC::Subfield object.
143
151
  # subfield = field['a']
144
-
152
+
145
153
  def [](code)
146
- subfield = self.find {|s| s.code == code}
154
+ subfield = find { |s| s.code == code }
147
155
  return subfield.value if subfield
148
- return
156
+ nil
149
157
  end
150
-
151
158
 
152
- def codes(dedup=true)
159
+ def codes(dedup = true)
153
160
  codes = []
154
- @subfields.each {|s| codes << s.code }
161
+ @subfields.each { |s| codes << s.code }
155
162
  dedup ? codes.uniq : codes
156
163
  end
157
164
 
158
- # Two fields are equal if their tag, indicators and
165
+ # Two fields are equal if their tag, indicators and
159
166
  # subfields are all equal.
160
167
 
161
168
  def ==(other)
169
+ if !other.is_a?(DataField)
170
+ return false
171
+ end
162
172
  if @tag != other.tag
163
- return false
173
+ return false
164
174
  elsif @indicator1 != other.indicator1
165
- return false
175
+ return false
166
176
  elsif @indicator2 != other.indicator2
167
- return false
177
+ return false
168
178
  elsif @subfields != other.subfields
169
179
  return false
170
180
  end
171
- return true
181
+ true
172
182
  end
173
183
 
174
-
175
184
  # To support regex matching with fields
176
185
  #
177
186
  # if field =~ /Huckleberry/ ...
178
187
 
179
188
  def =~(regex)
180
- return self.to_s =~ regex
189
+ to_s =~ regex
181
190
  end
182
191
 
183
-
184
192
  # to get the field as a string, without the tag and indicators
185
193
  # useful in situations where you want a legible version of the field
186
194
  #
187
195
  # print record['245'].value
188
196
 
189
197
  def value
190
- return(@subfields.map {|s| s.value} .join '')
198
+ (@subfields.map { |s| s.value }.join "")
191
199
  end
192
-
193
200
  end
194
201
  end
@@ -1,79 +1,97 @@
1
1
  module MARC
2
-
3
2
  # A class for mapping MARC records to Dublin Core
4
-
5
- class DublinCore
6
3
 
4
+ class DublinCore
7
5
  def self.map(record)
8
- dc_hash = Hash.new
9
- dc_hash['title'] = get_field_value(record['245']['a'])
6
+ dc_hash = {}
7
+ dc_hash["title"] = get_field_value(record["245"]["a"])
10
8
 
11
9
  # Creator
12
- [100, 110, 111, 700, 710, 711, 720].each do |field|
13
- dc_hash['creator'] ||= []
14
- dc_hash['creator'] << get_field_value(record[field.to_s])
10
+ ["100", "110", "111", "700", "710", "711", "720"].each do |field|
11
+ dc_hash["creator"] ||= []
12
+ dc_hash["creator"] << get_field_value(record[field])
15
13
  end
16
14
 
17
15
  # Subject
18
- [600, 610, 611, 630, 650, 653].each do |field|
19
- dc_hash['subject'] ||= []
20
- dc_hash['subject'] << get_field_value(record[field.to_s])
16
+ ["600", "610", "611", "630", "650", "653"].each do |field|
17
+ dc_hash["subject"] ||= []
18
+ dc_hash["subject"] << get_field_value(record[field])
21
19
  end
22
20
 
23
21
  # Description
24
- [500..599].each do |field|
25
- next if [506, 530, 540, 546].include?(field)
26
- dc_hash['description'] ||= []
27
- dc_hash['description'] << get_field_value(record[field.to_s])
22
+ ("500".."599").each do |field|
23
+ next if ["506", "530", "540", "546"].include?(field)
24
+ dc_hash["description"] ||= []
25
+ dc_hash["description"] << get_field_value(record[field])
28
26
  end
29
27
 
30
- dc_hash['publisher'] = get_field_value(record['260']['a']['b']) rescue nil
31
- dc_hash['date'] = get_field_value(record['260']['c']) rescue nil
32
- dc_hash['type'] = get_field_value(record['655'])
33
- dc_hash['format'] = get_field_value(record['856']['q']) rescue nil
34
- dc_hash['identifier'] = get_field_value(record['856']['u']) rescue nil
35
- dc_hash['source'] = get_field_value(record['786']['o']['t']) rescue nil
36
- dc_hash['language'] = get_field_value(record['546'])
28
+ dc_hash["publisher"] = begin
29
+ get_field_value(record["260"]["a"]["b"])
30
+ rescue
31
+ nil
32
+ end
33
+ dc_hash["date"] = begin
34
+ get_field_value(record["260"]["c"])
35
+ rescue
36
+ nil
37
+ end
38
+ dc_hash["type"] = get_field_value(record["655"])
39
+ dc_hash["format"] = begin
40
+ get_field_value(record["856"]["q"])
41
+ rescue
42
+ nil
43
+ end
44
+ dc_hash["identifier"] = begin
45
+ get_field_value(record["856"]["u"])
46
+ rescue
47
+ nil
48
+ end
49
+ dc_hash["source"] = begin
50
+ get_field_value(record["786"]["o"]["t"])
51
+ rescue
52
+ nil
53
+ end
54
+ dc_hash["language"] = get_field_value(record["546"])
37
55
 
38
- dc_hash['relation'] = []
39
- dc_hash['relation'] << get_field_value(record['530'])
40
- [760..787].each do |field|
41
- dc_hash['relation'] << get_field_value(record[field.to_s]['o']['t']) rescue nil
56
+ dc_hash["relation"] = []
57
+ dc_hash["relation"] << get_field_value(record["530"])
58
+ ("760".."787").each do |field|
59
+ dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
60
+ rescue
61
+ nil
42
62
  end
43
63
 
44
- [651, 752].each do |field|
45
- dc_hash['coverage'] ||= []
46
- dc_hash['coverage'] << get_field_value(record[field.to_s])
64
+ ["651", "752"].each do |field|
65
+ dc_hash["coverage"] ||= []
66
+ dc_hash["coverage"] << get_field_value(record[field])
47
67
  end
48
68
 
49
- [506, 540].each do |field|
50
- dc_hash['rights'] ||= []
51
- dc_hash['rights'] << get_field_value(record[field.to_s])
69
+ ["506", "540"].each do |field|
70
+ dc_hash["rights"] ||= []
71
+ dc_hash["rights"] << get_field_value(record[field])
52
72
  end
53
-
54
- dc_hash.keys.each do |key|
73
+
74
+ dc_hash.keys.each do |key|
55
75
  dc_hash[key].flatten! if dc_hash[key].respond_to?(:flatten!)
56
76
  dc_hash[key].compact! if dc_hash[key].respond_to?(:compact!)
57
77
  end
58
-
78
+
59
79
  dc_hash
60
80
  end
61
-
81
+
62
82
  def self.get_field_value(field)
63
83
  return if field.nil?
64
-
65
- if !field.kind_of?(String) && field.respond_to?(:each)
84
+
85
+ if !field.is_a?(String) && field.respond_to?(:each)
66
86
  values = []
67
87
  field.each do |element|
68
88
  values << get_field_value(element)
69
89
  end
70
90
  values
71
91
  else
72
- return field if field.kind_of?(String)
92
+ return field if field.is_a?(String)
73
93
  return field.value if field.respond_to?(:value)
74
94
  end
75
95
  end
76
-
77
96
  end
78
97
  end
79
-
@@ -1,9 +1,17 @@
1
1
  module MARC
2
-
3
2
  # basic exception class for exceptions that
4
3
  # can occur during MARC processing.
5
4
 
6
5
  class Exception < RuntimeError
7
6
  end
8
7
 
8
+ class RecordException < MARC::Exception
9
+ attr_reader :record
10
+
11
+ def initialize(record)
12
+ @record = record
13
+ id = @record["001"] || "<record with no 001>"
14
+ super("Record #{id}: #{@record.errors.join("\n....")}")
15
+ end
16
+ end
9
17
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module MARC
6
+ # Read marc-in-json documents from a `.jsonl` file -- also called
7
+ # "newline-delimited JSON", which is a file with one JSON document on each line.
8
+ class JSONLReader
9
+ include Enumerable
10
+
11
+ # @param [String, IO] file A filename, or open File/IO type object, from which to read
12
+ def initialize(file)
13
+ if file.is_a?(String)
14
+ raise ArgumentError.new("File '#{file}' can't be found") unless File.exist?(file)
15
+ raise ArgumentError.new("File '#{file}' can't be opened for reading") unless File.readable?(file)
16
+ @handle = File.new(file)
17
+ elsif file.respond_to?(:read, 5)
18
+ @handle = file
19
+ else
20
+ raise ArgumentError, "must pass in path or file"
21
+ end
22
+ end
23
+
24
+ # Turn marc-in-json lines into actual marc records and yield them
25
+ # @yieldreturn [MARC::Record] record created from each line of the file
26
+ def each
27
+ return enum_for(:each) unless block_given?
28
+ @handle.each do |line|
29
+ yield MARC::Record.new_from_hash(JSON.parse(line))
30
+ end
31
+ end
32
+ end
33
+ end