marc 1.1.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +116 -30
  7. data/Gemfile +5 -0
  8. data/README.md +239 -46
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -87
  22. data/lib/marc/reader.rb +116 -124
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -82
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +28 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -34
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +101 -94
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +129 -22
data/Rakefile CHANGED
@@ -1,22 +1,22 @@
1
- require 'rubygems'
1
+ require "rubygems"
2
2
 
3
- require 'rake'
4
- require 'rake/testtask'
5
- require 'rdoc/task'
6
- require 'bundler/gem_tasks'
3
+ require "rake"
4
+ require "rake/testtask"
5
+ require "rdoc/task"
6
+ require "bundler/gem_tasks"
7
+ require "standard/rake"
7
8
 
9
+ task default: [:test]
10
+ task format: "standard:fix"
8
11
 
9
- task :default => [:test]
10
-
11
- Rake::TestTask.new('test') do |t|
12
- t.libs << 'lib'
13
- t.pattern = 'test/**/tc_*.rb'
12
+ Rake::TestTask.new("test") do |t|
13
+ t.libs << "lib"
14
+ t.pattern = "test/**/tc_*.rb"
14
15
  t.verbose = true
15
16
  end
16
17
 
17
-
18
- Rake::RDocTask.new('doc') do |rd|
18
+ Rake::RDocTask.new("doc") do |rd|
19
19
  rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
20
- rd.main = 'MARC::Record'
21
- rd.rdoc_dir = 'doc'
20
+ rd.main = "MARC::Record"
21
+ rd.rdoc_dir = "doc"
22
22
  end
data/bin/marc ADDED
@@ -0,0 +1,14 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc'
4
+
5
+ # the filename
6
+ filename = ARGV[0]
7
+
8
+ reader = MARC::ForgivingReader.new(filename)
9
+
10
+ reader.each { |record|
11
+ puts record
12
+ }
13
+
14
+
data/bin/marc2xml ADDED
@@ -0,0 +1,17 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ require 'marc'
4
+
5
+ # the filename
6
+ filename = ARGV[0]
7
+
8
+ reader = MARC::ForgivingReader.new(filename)
9
+ writer = MARC::XMLWriter.new($stdout)
10
+
11
+ reader.each { |record|
12
+ writer.write(record)
13
+ }
14
+
15
+ writer.close
16
+
17
+
@@ -0,0 +1,10 @@
1
+ #!/usr/bin/env ruby
2
+
3
+ # usage: xml2marc.rb marc.xml > marc.dat
4
+
5
+ require "marc"
6
+
7
+ reader = MARC::XMLReader.new(ARGV.shift)
8
+ reader.each do |record|
9
+ $stdout.write(record.to_marc)
10
+ end
@@ -1,5 +1,6 @@
1
- module MARC
1
+ # frozen_string_literal: true
2
2
 
3
+ module MARC
3
4
  # constants used in MARC21 reading/writing
4
5
  LEADER_LENGTH = 24
5
6
  DIRECTORY_ENTRY_LENGTH = 12
@@ -7,12 +8,11 @@ module MARC
7
8
  END_OF_FIELD = 0x1E.chr
8
9
  END_OF_RECORD = 0x1D.chr
9
10
 
10
- # constants used in XML reading/writing
11
+ # constants used in XML reading/writing
11
12
  MARC_NS = "http://www.loc.gov/MARC21/slim"
12
13
  MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
13
14
 
14
15
  # marc-hash
15
16
  MARCHASH_MAJOR_VERSION = 1
16
17
  MARCHASH_MINOR_VERSION = 0
17
-
18
18
  end
@@ -1,23 +1,21 @@
1
- require 'set'
1
+ require "set"
2
2
 
3
3
  module MARC
4
-
5
- # MARC records contain control fields, each of which has a
4
+ # MARC records contain control fields, each of which has a
6
5
  # tag and value. Tags for control fields must be in the
7
6
  # 001-009 range or be specially added to the @@control_tags Set
8
7
 
9
8
  class ControlField
10
-
11
9
  # Initially, control tags are the numbers 1 through 9 or the string '000'
12
- @@control_tags = Set.new(%w{000 001 002 003 004 005 006 007 008 009})
13
-
10
+ @@control_tags = Set.new(%w[000 001 002 003 004 005 006 007 008 009])
11
+
14
12
  def self.control_tags
15
- return @@control_tags
13
+ @@control_tags
16
14
  end
17
15
 
18
16
  # A tag is a control tag if tag.to_s is a member of the @@control_tags set.
19
17
  def self.control_tag?(tag)
20
- return @@control_tags.include? tag.to_s
18
+ @@control_tags.include? tag.to_s
21
19
  end
22
20
 
23
21
  # the tag value (007, 008, etc)
@@ -26,46 +24,60 @@ module MARC
26
24
  # the value of the control field
27
25
  attr_accessor :value
28
26
 
29
- # The constructor which must be passed a tag value and
27
+ # The constructor which must be passed a tag value and
30
28
  # an optional value for the field.
31
29
 
32
- def initialize(tag,value='')
30
+ def initialize(tag, value = "")
33
31
  @tag = tag
34
32
  @value = value
35
- if not MARC::ControlField.control_tag?(@tag)
36
- raise MARC::Exception.new(), "tag must be in 001-009 or in the MARC::ControlField.control_tags set"
33
+ end
34
+
35
+ # Returns true if there are no error messages associated with the field
36
+ def valid?
37
+ errors.none?
38
+ end
39
+
40
+ # Returns an array of validation errors
41
+ def errors
42
+ messages = []
43
+
44
+ unless MARC::ControlField.control_tag?(@tag)
45
+ messages << "tag #{@tag.inspect} must be in 001-009 or in the MARC::ControlField.control_tags set"
37
46
  end
47
+
48
+ messages
38
49
  end
39
50
 
40
51
  # Two control fields are equal if their tags and values are equal.
41
52
 
42
53
  def ==(other)
54
+ if !other.is_a?(ControlField)
55
+ return false
56
+ end
43
57
  if @tag != other.tag
44
- return false
58
+ return false
45
59
  elsif @value != other.value
46
60
  return false
47
61
  end
48
- return true
62
+ true
49
63
  end
50
64
 
51
65
  # turning it into a marc-hash element
52
66
  def to_marchash
53
- return [@tag, @value]
67
+ [@tag, @value]
54
68
  end
55
-
69
+
56
70
  # Turn the control field into a hash for MARC-in-JSON
57
71
  def to_hash
58
- return {@tag=>@value}
72
+ {@tag => @value}
59
73
  end
60
-
74
+
61
75
  def to_s
62
- return "#{tag} #{value}"
76
+ "#{tag} #{value}"
63
77
  end
64
78
 
65
79
  def =~(regex)
66
- return self.to_s =~ regex
67
- end
68
-
80
+ to_s =~ regex
81
+ end
69
82
  end
70
-
71
83
  end
@@ -1,11 +1,11 @@
1
1
  module MARC
2
- # MARC records contain data fields, each of which has a tag,
2
+ # MARC records contain data fields, each of which has a tag,
3
3
  # indicators and subfields. Tags for data fields must are all
4
4
  # three-character tags that are not control fields (generally,
5
5
  # any numeric tag greater than 009).
6
6
  #
7
7
  # Accessor attributes: tag, indicator1, indicator2
8
- #
8
+ #
9
9
  # DataField mixes in Enumerable to enable access to it's constituent
10
10
  # Subfield objects. For instance, if you have a DataField representing
11
11
  # a 856 tag, and want to find all 'z' subfields:
@@ -14,7 +14,7 @@ module MARC
14
14
  #
15
15
  # Also, the accessor 'subfields' is an array of MARC::Subfield objects
16
16
  # which can be accessed or modified by the client directly if
17
- # neccesary.
17
+ # neccesary.
18
18
 
19
19
  class DataField
20
20
  include Enumerable
@@ -31,90 +31,100 @@ module MARC
31
31
  # A list of MARC::Subfield objects
32
32
  attr_accessor :subfields
33
33
 
34
-
35
34
  # Create a new field with tag, indicators and subfields.
36
- # Subfields are passed in as comma separated list of
37
- # MARC::Subfield objects,
38
- #
35
+ # Subfields are passed in as comma separated list of
36
+ # MARC::Subfield objects,
37
+ #
39
38
  # field = MARC::DataField.new('245','0','0',
40
39
  # MARC::Subfield.new('a', 'Consilience :'),
41
40
  # MARC::Subfield.new('b', 'the unity of knowledge '),
42
41
  # MARC::Subfield.new('c', 'by Edward O. Wilson.'))
43
- #
42
+ #
44
43
  # or using a shorthand:
45
- #
44
+ #
46
45
  # field = MARC::DataField.new('245','0','0',
47
46
  # ['a', 'Consilience :'],['b','the unity of knowledge '],
48
47
  # ['c', 'by Edward O. Wilson.'] )
49
48
 
50
- def initialize(tag, i1=' ', i2=' ', *subfields)
51
- # if the tag is less than 3 characters long and
49
+ def initialize(tag, i1 = " ", i2 = " ", *subfields)
50
+ # if the tag is less than 3 characters long and
52
51
  # the string is all numeric then we pad with zeros
53
- if tag.length < 3 and /^[0-9]+$/ =~ tag
54
- @tag = "%03d" % tag
52
+ @tag = if (tag.length < 3) && (/^[0-9]+$/ =~ tag)
53
+ "%03d" % tag
55
54
  else
56
- @tag = tag
55
+ tag
57
56
  end
58
- # can't allow nil to be passed in or else it'll
57
+ # can't allow nil to be passed in or else it'll
59
58
  # screw us up later when we try to encode
60
- @indicator1 = i1 == nil ? ' ' : i1
61
- @indicator2 = i2 == nil ? ' ' : i2
62
-
63
- @subfields = []
59
+ @indicator1 = i1.nil? ? " " : i1
60
+ @indicator2 = i2.nil? ? " " : i2
64
61
 
65
- # must use MARC::ControlField for tags < 010 or
66
- # those in MARC::ControlField#extra_control_fields
67
-
68
- if MARC::ControlField.control_tag?(@tag)
69
- raise MARC::Exception.new(),
70
- "MARC::DataField objects can't have ControlField tag '" + @tag + "')"
71
- end
62
+ @subfields = []
72
63
 
73
64
  # allows MARC::Subfield objects to be passed directly
74
65
  # or a shorthand of ['a','Foo'], ['b','Bar']
75
- subfields.each do |subfield|
66
+ subfields.each do |subfield|
76
67
  case subfield
77
68
  when MARC::Subfield
78
69
  @subfields.push(subfield)
79
70
  when Array
80
71
  if subfield.length > 2
81
- raise MARC::Exception.new(),
82
- "arrays must only have 2 elements: " + subfield.to_s
72
+ raise MARC::Exception.new,
73
+ "arrays must only have 2 elements: " + subfield.to_s
83
74
  end
84
75
  @subfields.push(
85
- MARC::Subfield.new(subfield[0],subfield[1]))
86
- else
87
- raise MARC::Exception.new(),
76
+ MARC::Subfield.new(subfield[0], subfield[1])
77
+ )
78
+ else
79
+ raise MARC::Exception.new,
88
80
  "invalid subfield type #{subfield.class}"
89
81
  end
90
82
  end
91
83
  end
92
84
 
85
+ # Returns true if there are no error messages associated with the field
86
+ def valid?
87
+ errors.none?
88
+ end
89
+
90
+ # Returns an array of validation errors
91
+ def errors
92
+ messages = []
93
+
94
+ # must use MARC::ControlField for tags < 010 or
95
+ # those in MARC::ControlField#extra_control_fields
96
+
97
+ if MARC::ControlField.control_tag?(@tag)
98
+ messages << "MARC::DataField objects can't have ControlField tag '" + @tag + "'"
99
+ end
100
+
101
+ messages
102
+ end
93
103
 
94
104
  # Returns a string representation of the field such as:
95
105
  # 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
96
106
 
97
107
  def to_s
98
108
  str = "#{tag} "
99
- str += "#{indicator1}#{indicator2} "
109
+ str += "#{indicator1}#{indicator2} "
100
110
  @subfields.each { |subfield| str += subfield.to_s }
101
- return str
111
+ str
102
112
  end
103
113
 
104
114
  # Turn into a marc-hash structure
105
115
  def to_marchash
106
- return [@tag, @indicator1, @indicator2, @subfields.map {|sf| [sf.code, sf.value]} ]
116
+ [@tag, @indicator1, @indicator2, @subfields.map { |sf| [sf.code, sf.value] }]
107
117
  end
108
-
118
+
109
119
  # Turn the variable field and subfields into a hash for MARC-in-JSON
110
-
120
+
111
121
  def to_hash
112
- field_hash = {@tag=>{'ind1'=>@indicator1,'ind2'=>@indicator2,'subfields'=>[]}}
113
- self.each do |subfield|
114
- field_hash[@tag]['subfields'] << {subfield.code=>subfield.value}
122
+ field_hash = {@tag => {"ind1" => @indicator1, "ind2" => @indicator2, "subfields" => []}}
123
+ each do |subfield|
124
+ field_hash[@tag]["subfields"] << {subfield.code => subfield.value}
115
125
  end
116
126
  field_hash
117
- end
127
+ end
118
128
 
119
129
  # Add a subfield (MARC::Subfield) to the field
120
130
  # field.append(MARC::Subfield.new('a','Dave Thomas'))
@@ -123,72 +133,69 @@ module MARC
123
133
  @subfields.push(subfield)
124
134
  end
125
135
 
126
-
127
-
128
136
  # You can iterate through the subfields in a Field:
129
137
  # field.each {|s| print s}
130
138
 
131
139
  def each
132
- for subfield in subfields
140
+ subfields.each do |subfield|
133
141
  yield subfield
134
142
  end
135
143
  end
136
144
 
137
- #def each_by_code(filter)
145
+ # def each_by_code(filter)
138
146
  # @subfields.each_by_code(filter)
139
- #end
147
+ # end
140
148
 
141
- # You can lookup subfields with this shorthand. Note it
149
+ # You can lookup subfields with this shorthand. Note it
142
150
  # will return a string and not a MARC::Subfield object.
143
151
  # subfield = field['a']
144
-
152
+
145
153
  def [](code)
146
- subfield = self.find {|s| s.code == code}
154
+ subfield = find { |s| s.code == code }
147
155
  return subfield.value if subfield
148
- return
156
+ nil
149
157
  end
150
-
151
158
 
152
- def codes(dedup=true)
159
+ def codes(dedup = true)
153
160
  codes = []
154
- @subfields.each {|s| codes << s.code }
161
+ @subfields.each { |s| codes << s.code }
155
162
  dedup ? codes.uniq : codes
156
163
  end
157
164
 
158
- # Two fields are equal if their tag, indicators and
165
+ # Two fields are equal if their tag, indicators and
159
166
  # subfields are all equal.
160
167
 
161
168
  def ==(other)
169
+ if !other.is_a?(DataField)
170
+ return false
171
+ end
162
172
  if @tag != other.tag
163
- return false
173
+ return false
164
174
  elsif @indicator1 != other.indicator1
165
- return false
175
+ return false
166
176
  elsif @indicator2 != other.indicator2
167
- return false
177
+ return false
168
178
  elsif @subfields != other.subfields
169
179
  return false
170
180
  end
171
- return true
181
+ true
172
182
  end
173
183
 
174
-
175
184
  # To support regex matching with fields
176
185
  #
177
186
  # if field =~ /Huckleberry/ ...
178
187
 
179
188
  def =~(regex)
180
- return self.to_s =~ regex
189
+ to_s =~ regex
181
190
  end
182
191
 
183
-
184
192
  # to get the field as a string, without the tag and indicators
185
193
  # useful in situations where you want a legible version of the field
186
194
  #
187
195
  # print record['245'].value
188
196
 
189
197
  def value
190
- return(@subfields.map {|s| s.value} .join '')
198
+ (@subfields.map { |s| s.value }.join "")
191
199
  end
192
-
193
200
  end
194
201
  end
@@ -1,79 +1,97 @@
1
1
  module MARC
2
-
3
2
  # A class for mapping MARC records to Dublin Core
4
-
5
- class DublinCore
6
3
 
4
+ class DublinCore
7
5
  def self.map(record)
8
- dc_hash = Hash.new
9
- dc_hash['title'] = get_field_value(record['245']['a'])
6
+ dc_hash = {}
7
+ dc_hash["title"] = get_field_value(record["245"]["a"])
10
8
 
11
9
  # Creator
12
- [100, 110, 111, 700, 710, 711, 720].each do |field|
13
- dc_hash['creator'] ||= []
14
- dc_hash['creator'] << get_field_value(record[field.to_s])
10
+ ["100", "110", "111", "700", "710", "711", "720"].each do |field|
11
+ dc_hash["creator"] ||= []
12
+ dc_hash["creator"] << get_field_value(record[field])
15
13
  end
16
14
 
17
15
  # Subject
18
- [600, 610, 611, 630, 650, 653].each do |field|
19
- dc_hash['subject'] ||= []
20
- dc_hash['subject'] << get_field_value(record[field.to_s])
16
+ ["600", "610", "611", "630", "650", "653"].each do |field|
17
+ dc_hash["subject"] ||= []
18
+ dc_hash["subject"] << get_field_value(record[field])
21
19
  end
22
20
 
23
21
  # Description
24
- [500..599].each do |field|
25
- next if [506, 530, 540, 546].include?(field)
26
- dc_hash['description'] ||= []
27
- dc_hash['description'] << get_field_value(record[field.to_s])
22
+ ("500".."599").each do |field|
23
+ next if ["506", "530", "540", "546"].include?(field)
24
+ dc_hash["description"] ||= []
25
+ dc_hash["description"] << get_field_value(record[field])
28
26
  end
29
27
 
30
- dc_hash['publisher'] = get_field_value(record['260']['a']['b']) rescue nil
31
- dc_hash['date'] = get_field_value(record['260']['c']) rescue nil
32
- dc_hash['type'] = get_field_value(record['655'])
33
- dc_hash['format'] = get_field_value(record['856']['q']) rescue nil
34
- dc_hash['identifier'] = get_field_value(record['856']['u']) rescue nil
35
- dc_hash['source'] = get_field_value(record['786']['o']['t']) rescue nil
36
- dc_hash['language'] = get_field_value(record['546'])
28
+ dc_hash["publisher"] = begin
29
+ get_field_value(record["260"]["a"]["b"])
30
+ rescue
31
+ nil
32
+ end
33
+ dc_hash["date"] = begin
34
+ get_field_value(record["260"]["c"])
35
+ rescue
36
+ nil
37
+ end
38
+ dc_hash["type"] = get_field_value(record["655"])
39
+ dc_hash["format"] = begin
40
+ get_field_value(record["856"]["q"])
41
+ rescue
42
+ nil
43
+ end
44
+ dc_hash["identifier"] = begin
45
+ get_field_value(record["856"]["u"])
46
+ rescue
47
+ nil
48
+ end
49
+ dc_hash["source"] = begin
50
+ get_field_value(record["786"]["o"]["t"])
51
+ rescue
52
+ nil
53
+ end
54
+ dc_hash["language"] = get_field_value(record["546"])
37
55
 
38
- dc_hash['relation'] = []
39
- dc_hash['relation'] << get_field_value(record['530'])
40
- [760..787].each do |field|
41
- dc_hash['relation'] << get_field_value(record[field.to_s]['o']['t']) rescue nil
56
+ dc_hash["relation"] = []
57
+ dc_hash["relation"] << get_field_value(record["530"])
58
+ ("760".."787").each do |field|
59
+ dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
60
+ rescue
61
+ nil
42
62
  end
43
63
 
44
- [651, 752].each do |field|
45
- dc_hash['coverage'] ||= []
46
- dc_hash['coverage'] << get_field_value(record[field.to_s])
64
+ ["651", "752"].each do |field|
65
+ dc_hash["coverage"] ||= []
66
+ dc_hash["coverage"] << get_field_value(record[field])
47
67
  end
48
68
 
49
- [506, 540].each do |field|
50
- dc_hash['rights'] ||= []
51
- dc_hash['rights'] << get_field_value(record[field.to_s])
69
+ ["506", "540"].each do |field|
70
+ dc_hash["rights"] ||= []
71
+ dc_hash["rights"] << get_field_value(record[field])
52
72
  end
53
-
54
- dc_hash.keys.each do |key|
73
+
74
+ dc_hash.keys.each do |key|
55
75
  dc_hash[key].flatten! if dc_hash[key].respond_to?(:flatten!)
56
76
  dc_hash[key].compact! if dc_hash[key].respond_to?(:compact!)
57
77
  end
58
-
78
+
59
79
  dc_hash
60
80
  end
61
-
81
+
62
82
  def self.get_field_value(field)
63
83
  return if field.nil?
64
-
65
- if !field.kind_of?(String) && field.respond_to?(:each)
84
+
85
+ if !field.is_a?(String) && field.respond_to?(:each)
66
86
  values = []
67
87
  field.each do |element|
68
88
  values << get_field_value(element)
69
89
  end
70
90
  values
71
91
  else
72
- return field if field.kind_of?(String)
92
+ return field if field.is_a?(String)
73
93
  return field.value if field.respond_to?(:value)
74
94
  end
75
95
  end
76
-
77
96
  end
78
97
  end
79
-
@@ -1,9 +1,17 @@
1
1
  module MARC
2
-
3
2
  # basic exception class for exceptions that
4
3
  # can occur during MARC processing.
5
4
 
6
5
  class Exception < RuntimeError
7
6
  end
8
7
 
8
+ class RecordException < MARC::Exception
9
+ attr_reader :record
10
+
11
+ def initialize(record)
12
+ @record = record
13
+ id = @record["001"] || "<record with no 001>"
14
+ super("Record #{id}: #{@record.errors.join("\n....")}")
15
+ end
16
+ end
9
17
  end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "json"
4
+
5
+ module MARC
6
+ # Read marc-in-json documents from a `.jsonl` file -- also called
7
+ # "newline-delimited JSON", which is a file with one JSON document on each line.
8
+ class JSONLReader
9
+ include Enumerable
10
+
11
+ # @param [String, IO] file A filename, or open File/IO type object, from which to read
12
+ def initialize(file)
13
+ if file.is_a?(String)
14
+ raise ArgumentError.new("File '#{file}' can't be found") unless File.exist?(file)
15
+ raise ArgumentError.new("File '#{file}' can't be opened for reading") unless File.readable?(file)
16
+ @handle = File.new(file)
17
+ elsif file.respond_to?(:read, 5)
18
+ @handle = file
19
+ else
20
+ raise ArgumentError, "must pass in path or file"
21
+ end
22
+ end
23
+
24
+ # Turn marc-in-json lines into actual marc records and yield them
25
+ # @yieldreturn [MARC::Record] record created from each line of the file
26
+ def each
27
+ return enum_for(:each) unless block_given?
28
+ @handle.each do |line|
29
+ yield MARC::Record.new_from_hash(JSON.parse(line))
30
+ end
31
+ end
32
+ end
33
+ end