marc 1.0.4 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +106 -29
- data/Gemfile +15 -0
- data/README.md +240 -47
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -86
- data/lib/marc/reader.rb +119 -121
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -81
- data/lib/marc.rb +20 -18
- data/marc.gemspec +23 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -32
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +99 -87
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +94 -9
data/Rakefile
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
require
|
1
|
+
require "rubygems"
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
3
|
+
require "rake"
|
4
|
+
require "rake/testtask"
|
5
|
+
require "rdoc/task"
|
6
|
+
require "bundler/gem_tasks"
|
7
|
+
require "standard/rake"
|
7
8
|
|
9
|
+
task default: [:test]
|
10
|
+
task format: "standard:fix"
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
t.libs << 'lib'
|
13
|
-
t.pattern = 'test/**/tc_*.rb'
|
12
|
+
Rake::TestTask.new("test") do |t|
|
13
|
+
t.libs << "lib"
|
14
|
+
t.pattern = "test/**/tc_*.rb"
|
14
15
|
t.verbose = true
|
15
16
|
end
|
16
17
|
|
17
|
-
|
18
|
-
Rake::RDocTask.new('doc') do |rd|
|
18
|
+
Rake::RDocTask.new("doc") do |rd|
|
19
19
|
rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
|
20
|
-
rd.main =
|
21
|
-
rd.rdoc_dir =
|
20
|
+
rd.main = "MARC::Record"
|
21
|
+
rd.rdoc_dir = "doc"
|
22
22
|
end
|
data/bin/marc
ADDED
data/bin/marc2xml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc'
|
4
|
+
|
5
|
+
# the filename
|
6
|
+
filename = ARGV[0]
|
7
|
+
|
8
|
+
reader = MARC::ForgivingReader.new(filename)
|
9
|
+
writer = MARC::XMLWriter.new($stdout)
|
10
|
+
|
11
|
+
reader.each { |record|
|
12
|
+
writer.write(record)
|
13
|
+
}
|
14
|
+
|
15
|
+
writer.close
|
16
|
+
|
17
|
+
|
data/lib/marc/constants.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
module MARC
|
3
4
|
# constants used in MARC21 reading/writing
|
4
5
|
LEADER_LENGTH = 24
|
5
6
|
DIRECTORY_ENTRY_LENGTH = 12
|
@@ -7,12 +8,11 @@ module MARC
|
|
7
8
|
END_OF_FIELD = 0x1E.chr
|
8
9
|
END_OF_RECORD = 0x1D.chr
|
9
10
|
|
10
|
-
# constants used in XML reading/writing
|
11
|
+
# constants used in XML reading/writing
|
11
12
|
MARC_NS = "http://www.loc.gov/MARC21/slim"
|
12
13
|
MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
|
13
14
|
|
14
15
|
# marc-hash
|
15
16
|
MARCHASH_MAJOR_VERSION = 1
|
16
17
|
MARCHASH_MINOR_VERSION = 0
|
17
|
-
|
18
18
|
end
|
data/lib/marc/controlfield.rb
CHANGED
@@ -1,23 +1,21 @@
|
|
1
|
-
require
|
1
|
+
require "set"
|
2
2
|
|
3
3
|
module MARC
|
4
|
-
|
5
|
-
# MARC records contain control fields, each of which has a
|
4
|
+
# MARC records contain control fields, each of which has a
|
6
5
|
# tag and value. Tags for control fields must be in the
|
7
6
|
# 001-009 range or be specially added to the @@control_tags Set
|
8
7
|
|
9
8
|
class ControlField
|
10
|
-
|
11
9
|
# Initially, control tags are the numbers 1 through 9 or the string '000'
|
12
|
-
@@control_tags = Set.new(%w
|
13
|
-
|
10
|
+
@@control_tags = Set.new(%w[000 001 002 003 004 005 006 007 008 009])
|
11
|
+
|
14
12
|
def self.control_tags
|
15
|
-
|
13
|
+
@@control_tags
|
16
14
|
end
|
17
15
|
|
18
16
|
# A tag is a control tag if tag.to_s is a member of the @@control_tags set.
|
19
17
|
def self.control_tag?(tag)
|
20
|
-
|
18
|
+
@@control_tags.include? tag.to_s
|
21
19
|
end
|
22
20
|
|
23
21
|
# the tag value (007, 008, etc)
|
@@ -26,46 +24,60 @@ module MARC
|
|
26
24
|
# the value of the control field
|
27
25
|
attr_accessor :value
|
28
26
|
|
29
|
-
# The constructor which must be passed a tag value and
|
27
|
+
# The constructor which must be passed a tag value and
|
30
28
|
# an optional value for the field.
|
31
29
|
|
32
|
-
def initialize(tag,value=
|
30
|
+
def initialize(tag, value = "")
|
33
31
|
@tag = tag
|
34
32
|
@value = value
|
35
|
-
|
36
|
-
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns true if there are no error messages associated with the field
|
36
|
+
def valid?
|
37
|
+
errors.none?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns an array of validation errors
|
41
|
+
def errors
|
42
|
+
messages = []
|
43
|
+
|
44
|
+
unless MARC::ControlField.control_tag?(@tag)
|
45
|
+
messages << "tag #{@tag.inspect} must be in 001-009 or in the MARC::ControlField.control_tags set"
|
37
46
|
end
|
47
|
+
|
48
|
+
messages
|
38
49
|
end
|
39
50
|
|
40
51
|
# Two control fields are equal if their tags and values are equal.
|
41
52
|
|
42
53
|
def ==(other)
|
54
|
+
if !other.is_a?(ControlField)
|
55
|
+
return false
|
56
|
+
end
|
43
57
|
if @tag != other.tag
|
44
|
-
return false
|
58
|
+
return false
|
45
59
|
elsif @value != other.value
|
46
60
|
return false
|
47
61
|
end
|
48
|
-
|
62
|
+
true
|
49
63
|
end
|
50
64
|
|
51
65
|
# turning it into a marc-hash element
|
52
66
|
def to_marchash
|
53
|
-
|
67
|
+
[@tag, @value]
|
54
68
|
end
|
55
|
-
|
69
|
+
|
56
70
|
# Turn the control field into a hash for MARC-in-JSON
|
57
71
|
def to_hash
|
58
|
-
|
72
|
+
{@tag => @value}
|
59
73
|
end
|
60
|
-
|
74
|
+
|
61
75
|
def to_s
|
62
|
-
|
76
|
+
"#{tag} #{value}"
|
63
77
|
end
|
64
78
|
|
65
79
|
def =~(regex)
|
66
|
-
|
67
|
-
end
|
68
|
-
|
80
|
+
to_s =~ regex
|
81
|
+
end
|
69
82
|
end
|
70
|
-
|
71
83
|
end
|
data/lib/marc/datafield.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
module MARC
|
2
|
-
# MARC records contain data fields, each of which has a tag,
|
2
|
+
# MARC records contain data fields, each of which has a tag,
|
3
3
|
# indicators and subfields. Tags for data fields must are all
|
4
4
|
# three-character tags that are not control fields (generally,
|
5
5
|
# any numeric tag greater than 009).
|
6
6
|
#
|
7
7
|
# Accessor attributes: tag, indicator1, indicator2
|
8
|
-
#
|
8
|
+
#
|
9
9
|
# DataField mixes in Enumerable to enable access to it's constituent
|
10
10
|
# Subfield objects. For instance, if you have a DataField representing
|
11
11
|
# a 856 tag, and want to find all 'z' subfields:
|
@@ -14,7 +14,7 @@ module MARC
|
|
14
14
|
#
|
15
15
|
# Also, the accessor 'subfields' is an array of MARC::Subfield objects
|
16
16
|
# which can be accessed or modified by the client directly if
|
17
|
-
# neccesary.
|
17
|
+
# neccesary.
|
18
18
|
|
19
19
|
class DataField
|
20
20
|
include Enumerable
|
@@ -31,90 +31,100 @@ module MARC
|
|
31
31
|
# A list of MARC::Subfield objects
|
32
32
|
attr_accessor :subfields
|
33
33
|
|
34
|
-
|
35
34
|
# Create a new field with tag, indicators and subfields.
|
36
|
-
# Subfields are passed in as comma separated list of
|
37
|
-
# MARC::Subfield objects,
|
38
|
-
#
|
35
|
+
# Subfields are passed in as comma separated list of
|
36
|
+
# MARC::Subfield objects,
|
37
|
+
#
|
39
38
|
# field = MARC::DataField.new('245','0','0',
|
40
39
|
# MARC::Subfield.new('a', 'Consilience :'),
|
41
40
|
# MARC::Subfield.new('b', 'the unity of knowledge '),
|
42
41
|
# MARC::Subfield.new('c', 'by Edward O. Wilson.'))
|
43
|
-
#
|
42
|
+
#
|
44
43
|
# or using a shorthand:
|
45
|
-
#
|
44
|
+
#
|
46
45
|
# field = MARC::DataField.new('245','0','0',
|
47
46
|
# ['a', 'Consilience :'],['b','the unity of knowledge '],
|
48
47
|
# ['c', 'by Edward O. Wilson.'] )
|
49
48
|
|
50
|
-
def initialize(tag, i1=
|
51
|
-
# if the tag is less than 3 characters long and
|
49
|
+
def initialize(tag, i1 = " ", i2 = " ", *subfields)
|
50
|
+
# if the tag is less than 3 characters long and
|
52
51
|
# the string is all numeric then we pad with zeros
|
53
|
-
if tag.length < 3
|
54
|
-
|
52
|
+
@tag = if (tag.length < 3) && (/^[0-9]+$/ =~ tag)
|
53
|
+
"%03d" % tag
|
55
54
|
else
|
56
|
-
|
55
|
+
tag
|
57
56
|
end
|
58
|
-
# can't allow nil to be passed in or else it'll
|
57
|
+
# can't allow nil to be passed in or else it'll
|
59
58
|
# screw us up later when we try to encode
|
60
|
-
@indicator1 = i1
|
61
|
-
@indicator2 = i2
|
62
|
-
|
63
|
-
@subfields = []
|
59
|
+
@indicator1 = i1.nil? ? " " : i1
|
60
|
+
@indicator2 = i2.nil? ? " " : i2
|
64
61
|
|
65
|
-
|
66
|
-
# those in MARC::ControlField#extra_control_fields
|
67
|
-
|
68
|
-
if MARC::ControlField.control_tag?(@tag)
|
69
|
-
raise MARC::Exception.new(),
|
70
|
-
"MARC::DataField objects can't have ControlField tag '" + @tag + "')"
|
71
|
-
end
|
62
|
+
@subfields = []
|
72
63
|
|
73
64
|
# allows MARC::Subfield objects to be passed directly
|
74
65
|
# or a shorthand of ['a','Foo'], ['b','Bar']
|
75
|
-
subfields.each do |subfield|
|
66
|
+
subfields.each do |subfield|
|
76
67
|
case subfield
|
77
68
|
when MARC::Subfield
|
78
69
|
@subfields.push(subfield)
|
79
70
|
when Array
|
80
71
|
if subfield.length > 2
|
81
|
-
raise MARC::Exception.new
|
82
|
-
"arrays must only have 2 elements: " + subfield.to_s
|
72
|
+
raise MARC::Exception.new,
|
73
|
+
"arrays must only have 2 elements: " + subfield.to_s
|
83
74
|
end
|
84
75
|
@subfields.push(
|
85
|
-
MARC::Subfield.new(subfield[0],subfield[1])
|
86
|
-
|
87
|
-
|
76
|
+
MARC::Subfield.new(subfield[0], subfield[1])
|
77
|
+
)
|
78
|
+
else
|
79
|
+
raise MARC::Exception.new,
|
88
80
|
"invalid subfield type #{subfield.class}"
|
89
81
|
end
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
85
|
+
# Returns true if there are no error messages associated with the field
|
86
|
+
def valid?
|
87
|
+
errors.none?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns an array of validation errors
|
91
|
+
def errors
|
92
|
+
messages = []
|
93
|
+
|
94
|
+
# must use MARC::ControlField for tags < 010 or
|
95
|
+
# those in MARC::ControlField#extra_control_fields
|
96
|
+
|
97
|
+
if MARC::ControlField.control_tag?(@tag)
|
98
|
+
messages << "MARC::DataField objects can't have ControlField tag '" + @tag + "'"
|
99
|
+
end
|
100
|
+
|
101
|
+
messages
|
102
|
+
end
|
93
103
|
|
94
104
|
# Returns a string representation of the field such as:
|
95
105
|
# 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
|
96
106
|
|
97
107
|
def to_s
|
98
108
|
str = "#{tag} "
|
99
|
-
str += "#{indicator1}#{indicator2} "
|
109
|
+
str += "#{indicator1}#{indicator2} "
|
100
110
|
@subfields.each { |subfield| str += subfield.to_s }
|
101
|
-
|
111
|
+
str
|
102
112
|
end
|
103
113
|
|
104
114
|
# Turn into a marc-hash structure
|
105
115
|
def to_marchash
|
106
|
-
|
116
|
+
[@tag, @indicator1, @indicator2, @subfields.map { |sf| [sf.code, sf.value] }]
|
107
117
|
end
|
108
|
-
|
118
|
+
|
109
119
|
# Turn the variable field and subfields into a hash for MARC-in-JSON
|
110
|
-
|
120
|
+
|
111
121
|
def to_hash
|
112
|
-
field_hash = {@tag=>{
|
113
|
-
|
114
|
-
field_hash[@tag][
|
122
|
+
field_hash = {@tag => {"ind1" => @indicator1, "ind2" => @indicator2, "subfields" => []}}
|
123
|
+
each do |subfield|
|
124
|
+
field_hash[@tag]["subfields"] << {subfield.code => subfield.value}
|
115
125
|
end
|
116
126
|
field_hash
|
117
|
-
end
|
127
|
+
end
|
118
128
|
|
119
129
|
# Add a subfield (MARC::Subfield) to the field
|
120
130
|
# field.append(MARC::Subfield.new('a','Dave Thomas'))
|
@@ -123,72 +133,69 @@ module MARC
|
|
123
133
|
@subfields.push(subfield)
|
124
134
|
end
|
125
135
|
|
126
|
-
|
127
|
-
|
128
136
|
# You can iterate through the subfields in a Field:
|
129
137
|
# field.each {|s| print s}
|
130
138
|
|
131
139
|
def each
|
132
|
-
|
140
|
+
subfields.each do |subfield|
|
133
141
|
yield subfield
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
137
|
-
#def each_by_code(filter)
|
145
|
+
# def each_by_code(filter)
|
138
146
|
# @subfields.each_by_code(filter)
|
139
|
-
#end
|
147
|
+
# end
|
140
148
|
|
141
|
-
# You can lookup subfields with this shorthand. Note it
|
149
|
+
# You can lookup subfields with this shorthand. Note it
|
142
150
|
# will return a string and not a MARC::Subfield object.
|
143
151
|
# subfield = field['a']
|
144
|
-
|
152
|
+
|
145
153
|
def [](code)
|
146
|
-
subfield =
|
154
|
+
subfield = find { |s| s.code == code }
|
147
155
|
return subfield.value if subfield
|
148
|
-
|
156
|
+
nil
|
149
157
|
end
|
150
|
-
|
151
158
|
|
152
|
-
def codes(dedup=true)
|
159
|
+
def codes(dedup = true)
|
153
160
|
codes = []
|
154
|
-
@subfields.each {|s| codes << s.code }
|
161
|
+
@subfields.each { |s| codes << s.code }
|
155
162
|
dedup ? codes.uniq : codes
|
156
163
|
end
|
157
164
|
|
158
|
-
# Two fields are equal if their tag, indicators and
|
165
|
+
# Two fields are equal if their tag, indicators and
|
159
166
|
# subfields are all equal.
|
160
167
|
|
161
168
|
def ==(other)
|
169
|
+
if !other.is_a?(DataField)
|
170
|
+
return false
|
171
|
+
end
|
162
172
|
if @tag != other.tag
|
163
|
-
return false
|
173
|
+
return false
|
164
174
|
elsif @indicator1 != other.indicator1
|
165
|
-
return false
|
175
|
+
return false
|
166
176
|
elsif @indicator2 != other.indicator2
|
167
|
-
return false
|
177
|
+
return false
|
168
178
|
elsif @subfields != other.subfields
|
169
179
|
return false
|
170
180
|
end
|
171
|
-
|
181
|
+
true
|
172
182
|
end
|
173
183
|
|
174
|
-
|
175
184
|
# To support regex matching with fields
|
176
185
|
#
|
177
186
|
# if field =~ /Huckleberry/ ...
|
178
187
|
|
179
188
|
def =~(regex)
|
180
|
-
|
189
|
+
to_s =~ regex
|
181
190
|
end
|
182
191
|
|
183
|
-
|
184
192
|
# to get the field as a string, without the tag and indicators
|
185
193
|
# useful in situations where you want a legible version of the field
|
186
194
|
#
|
187
195
|
# print record['245'].value
|
188
196
|
|
189
197
|
def value
|
190
|
-
|
198
|
+
(@subfields.map { |s| s.value }.join "")
|
191
199
|
end
|
192
|
-
|
193
200
|
end
|
194
201
|
end
|
data/lib/marc/dublincore.rb
CHANGED
@@ -1,79 +1,97 @@
|
|
1
1
|
module MARC
|
2
|
-
|
3
2
|
# A class for mapping MARC records to Dublin Core
|
4
|
-
|
5
|
-
class DublinCore
|
6
3
|
|
4
|
+
class DublinCore
|
7
5
|
def self.map(record)
|
8
|
-
dc_hash =
|
9
|
-
dc_hash[
|
6
|
+
dc_hash = {}
|
7
|
+
dc_hash["title"] = get_field_value(record["245"]["a"])
|
10
8
|
|
11
9
|
# Creator
|
12
|
-
[100, 110, 111, 700, 710, 711, 720].each do |field|
|
13
|
-
dc_hash[
|
14
|
-
dc_hash[
|
10
|
+
["100", "110", "111", "700", "710", "711", "720"].each do |field|
|
11
|
+
dc_hash["creator"] ||= []
|
12
|
+
dc_hash["creator"] << get_field_value(record[field])
|
15
13
|
end
|
16
14
|
|
17
15
|
# Subject
|
18
|
-
[600, 610, 611, 630, 650, 653].each do |field|
|
19
|
-
dc_hash[
|
20
|
-
dc_hash[
|
16
|
+
["600", "610", "611", "630", "650", "653"].each do |field|
|
17
|
+
dc_hash["subject"] ||= []
|
18
|
+
dc_hash["subject"] << get_field_value(record[field])
|
21
19
|
end
|
22
20
|
|
23
21
|
# Description
|
24
|
-
|
25
|
-
next if [506, 530, 540, 546].include?(field)
|
26
|
-
dc_hash[
|
27
|
-
dc_hash[
|
22
|
+
("500".."599").each do |field|
|
23
|
+
next if ["506", "530", "540", "546"].include?(field)
|
24
|
+
dc_hash["description"] ||= []
|
25
|
+
dc_hash["description"] << get_field_value(record[field])
|
28
26
|
end
|
29
27
|
|
30
|
-
dc_hash[
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
dc_hash[
|
36
|
-
|
28
|
+
dc_hash["publisher"] = begin
|
29
|
+
get_field_value(record["260"]["a"]["b"])
|
30
|
+
rescue
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
dc_hash["date"] = begin
|
34
|
+
get_field_value(record["260"]["c"])
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
dc_hash["type"] = get_field_value(record["655"])
|
39
|
+
dc_hash["format"] = begin
|
40
|
+
get_field_value(record["856"]["q"])
|
41
|
+
rescue
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
dc_hash["identifier"] = begin
|
45
|
+
get_field_value(record["856"]["u"])
|
46
|
+
rescue
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
dc_hash["source"] = begin
|
50
|
+
get_field_value(record["786"]["o"]["t"])
|
51
|
+
rescue
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
dc_hash["language"] = get_field_value(record["546"])
|
37
55
|
|
38
|
-
dc_hash[
|
39
|
-
dc_hash[
|
40
|
-
|
41
|
-
dc_hash[
|
56
|
+
dc_hash["relation"] = []
|
57
|
+
dc_hash["relation"] << get_field_value(record["530"])
|
58
|
+
("760".."787").each do |field|
|
59
|
+
dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
|
60
|
+
rescue
|
61
|
+
nil
|
42
62
|
end
|
43
63
|
|
44
|
-
[651, 752].each do |field|
|
45
|
-
dc_hash[
|
46
|
-
dc_hash[
|
64
|
+
["651", "752"].each do |field|
|
65
|
+
dc_hash["coverage"] ||= []
|
66
|
+
dc_hash["coverage"] << get_field_value(record[field])
|
47
67
|
end
|
48
68
|
|
49
|
-
[506, 540].each do |field|
|
50
|
-
dc_hash[
|
51
|
-
dc_hash[
|
69
|
+
["506", "540"].each do |field|
|
70
|
+
dc_hash["rights"] ||= []
|
71
|
+
dc_hash["rights"] << get_field_value(record[field])
|
52
72
|
end
|
53
|
-
|
54
|
-
dc_hash.keys.each do |key|
|
73
|
+
|
74
|
+
dc_hash.keys.each do |key|
|
55
75
|
dc_hash[key].flatten! if dc_hash[key].respond_to?(:flatten!)
|
56
76
|
dc_hash[key].compact! if dc_hash[key].respond_to?(:compact!)
|
57
77
|
end
|
58
|
-
|
78
|
+
|
59
79
|
dc_hash
|
60
80
|
end
|
61
|
-
|
81
|
+
|
62
82
|
def self.get_field_value(field)
|
63
83
|
return if field.nil?
|
64
|
-
|
65
|
-
if !field.
|
84
|
+
|
85
|
+
if !field.is_a?(String) && field.respond_to?(:each)
|
66
86
|
values = []
|
67
87
|
field.each do |element|
|
68
88
|
values << get_field_value(element)
|
69
89
|
end
|
70
90
|
values
|
71
91
|
else
|
72
|
-
return field if field.
|
92
|
+
return field if field.is_a?(String)
|
73
93
|
return field.value if field.respond_to?(:value)
|
74
94
|
end
|
75
95
|
end
|
76
|
-
|
77
96
|
end
|
78
97
|
end
|
79
|
-
|
data/lib/marc/exception.rb
CHANGED
@@ -1,9 +1,17 @@
|
|
1
1
|
module MARC
|
2
|
-
|
3
2
|
# basic exception class for exceptions that
|
4
3
|
# can occur during MARC processing.
|
5
4
|
|
6
5
|
class Exception < RuntimeError
|
7
6
|
end
|
8
7
|
|
8
|
+
class RecordException < MARC::Exception
|
9
|
+
attr_reader :record
|
10
|
+
|
11
|
+
def initialize(record)
|
12
|
+
@record = record
|
13
|
+
id = @record["001"] || "<record with no 001>"
|
14
|
+
super("Record #{id}: #{@record.errors.join("\n....")}")
|
15
|
+
end
|
16
|
+
end
|
9
17
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module MARC
|
6
|
+
# Read marc-in-json documents from a `.jsonl` file -- also called
|
7
|
+
# "newline-delimited JSON", which is a file with one JSON document on each line.
|
8
|
+
class JSONLReader
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
# @param [String, IO] file A filename, or open File/IO type object, from which to read
|
12
|
+
def initialize(file)
|
13
|
+
if file.is_a?(String)
|
14
|
+
raise ArgumentError.new("File '#{file}' can't be found") unless File.exist?(file)
|
15
|
+
raise ArgumentError.new("File '#{file}' can't be opened for reading") unless File.readable?(file)
|
16
|
+
@handle = File.new(file)
|
17
|
+
elsif file.respond_to?(:read, 5)
|
18
|
+
@handle = file
|
19
|
+
else
|
20
|
+
raise ArgumentError, "must pass in path or file"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Turn marc-in-json lines into actual marc records and yield them
|
25
|
+
# @yieldreturn [MARC::Record] record created from each line of the file
|
26
|
+
def each
|
27
|
+
return enum_for(:each) unless block_given?
|
28
|
+
@handle.each do |line|
|
29
|
+
yield MARC::Record.new_from_hash(JSON.parse(line))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|