marc 1.0.4 → 1.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
- data/.github/workflows/ruby.yml +24 -0
- data/.gitignore +17 -0
- data/.standard.yml +1 -0
- data/{Changes → CHANGELOG.md} +106 -29
- data/Gemfile +15 -0
- data/README.md +240 -47
- data/Rakefile +14 -14
- data/bin/marc +14 -0
- data/bin/marc2xml +17 -0
- data/examples/xml2marc.rb +10 -0
- data/lib/marc/constants.rb +3 -3
- data/lib/marc/controlfield.rb +35 -23
- data/lib/marc/datafield.rb +70 -63
- data/lib/marc/dublincore.rb +59 -41
- data/lib/marc/exception.rb +9 -1
- data/lib/marc/jsonl_reader.rb +33 -0
- data/lib/marc/jsonl_writer.rb +44 -0
- data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
- data/lib/marc/marc8/to_unicode.rb +80 -86
- data/lib/marc/reader.rb +119 -121
- data/lib/marc/record.rb +72 -62
- data/lib/marc/subfield.rb +12 -10
- data/lib/marc/unsafe_xmlwriter.rb +93 -0
- data/lib/marc/version.rb +1 -1
- data/lib/marc/writer.rb +27 -30
- data/lib/marc/xml_parsers.rb +222 -197
- data/lib/marc/xmlreader.rb +131 -114
- data/lib/marc/xmlwriter.rb +93 -81
- data/lib/marc.rb +20 -18
- data/marc.gemspec +23 -0
- data/test/marc8/tc_marc8_mapping.rb +3 -3
- data/test/marc8/tc_to_unicode.rb +28 -32
- data/test/messed_up_leader.xml +9 -0
- data/test/tc_controlfield.rb +37 -34
- data/test/tc_datafield.rb +65 -60
- data/test/tc_dublincore.rb +9 -11
- data/test/tc_hash.rb +10 -13
- data/test/tc_jsonl.rb +19 -0
- data/test/tc_marchash.rb +17 -21
- data/test/tc_parsers.rb +108 -144
- data/test/tc_reader.rb +35 -36
- data/test/tc_reader_char_encodings.rb +149 -169
- data/test/tc_record.rb +143 -148
- data/test/tc_subfield.rb +14 -13
- data/test/tc_unsafe_xml.rb +95 -0
- data/test/tc_writer.rb +101 -108
- data/test/tc_xml.rb +99 -87
- data/test/tc_xml_error_handling.rb +7 -8
- data/test/ts_marc.rb +8 -8
- metadata +94 -9
data/Rakefile
CHANGED
@@ -1,22 +1,22 @@
|
|
1
|
-
require
|
1
|
+
require "rubygems"
|
2
2
|
|
3
|
-
require
|
4
|
-
require
|
5
|
-
require
|
6
|
-
require
|
3
|
+
require "rake"
|
4
|
+
require "rake/testtask"
|
5
|
+
require "rdoc/task"
|
6
|
+
require "bundler/gem_tasks"
|
7
|
+
require "standard/rake"
|
7
8
|
|
9
|
+
task default: [:test]
|
10
|
+
task format: "standard:fix"
|
8
11
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
t.libs << 'lib'
|
13
|
-
t.pattern = 'test/**/tc_*.rb'
|
12
|
+
Rake::TestTask.new("test") do |t|
|
13
|
+
t.libs << "lib"
|
14
|
+
t.pattern = "test/**/tc_*.rb"
|
14
15
|
t.verbose = true
|
15
16
|
end
|
16
17
|
|
17
|
-
|
18
|
-
Rake::RDocTask.new('doc') do |rd|
|
18
|
+
Rake::RDocTask.new("doc") do |rd|
|
19
19
|
rd.rdoc_files.include("README", "Changes", "LICENSE", "lib/**/*.rb")
|
20
|
-
rd.main =
|
21
|
-
rd.rdoc_dir =
|
20
|
+
rd.main = "MARC::Record"
|
21
|
+
rd.rdoc_dir = "doc"
|
22
22
|
end
|
data/bin/marc
ADDED
data/bin/marc2xml
ADDED
@@ -0,0 +1,17 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
require 'marc'
|
4
|
+
|
5
|
+
# the filename
|
6
|
+
filename = ARGV[0]
|
7
|
+
|
8
|
+
reader = MARC::ForgivingReader.new(filename)
|
9
|
+
writer = MARC::XMLWriter.new($stdout)
|
10
|
+
|
11
|
+
reader.each { |record|
|
12
|
+
writer.write(record)
|
13
|
+
}
|
14
|
+
|
15
|
+
writer.close
|
16
|
+
|
17
|
+
|
data/lib/marc/constants.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
|
-
|
1
|
+
# frozen_string_literal: true
|
2
2
|
|
3
|
+
module MARC
|
3
4
|
# constants used in MARC21 reading/writing
|
4
5
|
LEADER_LENGTH = 24
|
5
6
|
DIRECTORY_ENTRY_LENGTH = 12
|
@@ -7,12 +8,11 @@ module MARC
|
|
7
8
|
END_OF_FIELD = 0x1E.chr
|
8
9
|
END_OF_RECORD = 0x1D.chr
|
9
10
|
|
10
|
-
# constants used in XML reading/writing
|
11
|
+
# constants used in XML reading/writing
|
11
12
|
MARC_NS = "http://www.loc.gov/MARC21/slim"
|
12
13
|
MARC_XSD = "http://www.loc.gov/standards/marcxml/schema/MARC21slim.xsd"
|
13
14
|
|
14
15
|
# marc-hash
|
15
16
|
MARCHASH_MAJOR_VERSION = 1
|
16
17
|
MARCHASH_MINOR_VERSION = 0
|
17
|
-
|
18
18
|
end
|
data/lib/marc/controlfield.rb
CHANGED
@@ -1,23 +1,21 @@
|
|
1
|
-
require
|
1
|
+
require "set"
|
2
2
|
|
3
3
|
module MARC
|
4
|
-
|
5
|
-
# MARC records contain control fields, each of which has a
|
4
|
+
# MARC records contain control fields, each of which has a
|
6
5
|
# tag and value. Tags for control fields must be in the
|
7
6
|
# 001-009 range or be specially added to the @@control_tags Set
|
8
7
|
|
9
8
|
class ControlField
|
10
|
-
|
11
9
|
# Initially, control tags are the numbers 1 through 9 or the string '000'
|
12
|
-
@@control_tags = Set.new(%w
|
13
|
-
|
10
|
+
@@control_tags = Set.new(%w[000 001 002 003 004 005 006 007 008 009])
|
11
|
+
|
14
12
|
def self.control_tags
|
15
|
-
|
13
|
+
@@control_tags
|
16
14
|
end
|
17
15
|
|
18
16
|
# A tag is a control tag if tag.to_s is a member of the @@control_tags set.
|
19
17
|
def self.control_tag?(tag)
|
20
|
-
|
18
|
+
@@control_tags.include? tag.to_s
|
21
19
|
end
|
22
20
|
|
23
21
|
# the tag value (007, 008, etc)
|
@@ -26,46 +24,60 @@ module MARC
|
|
26
24
|
# the value of the control field
|
27
25
|
attr_accessor :value
|
28
26
|
|
29
|
-
# The constructor which must be passed a tag value and
|
27
|
+
# The constructor which must be passed a tag value and
|
30
28
|
# an optional value for the field.
|
31
29
|
|
32
|
-
def initialize(tag,value=
|
30
|
+
def initialize(tag, value = "")
|
33
31
|
@tag = tag
|
34
32
|
@value = value
|
35
|
-
|
36
|
-
|
33
|
+
end
|
34
|
+
|
35
|
+
# Returns true if there are no error messages associated with the field
|
36
|
+
def valid?
|
37
|
+
errors.none?
|
38
|
+
end
|
39
|
+
|
40
|
+
# Returns an array of validation errors
|
41
|
+
def errors
|
42
|
+
messages = []
|
43
|
+
|
44
|
+
unless MARC::ControlField.control_tag?(@tag)
|
45
|
+
messages << "tag #{@tag.inspect} must be in 001-009 or in the MARC::ControlField.control_tags set"
|
37
46
|
end
|
47
|
+
|
48
|
+
messages
|
38
49
|
end
|
39
50
|
|
40
51
|
# Two control fields are equal if their tags and values are equal.
|
41
52
|
|
42
53
|
def ==(other)
|
54
|
+
if !other.is_a?(ControlField)
|
55
|
+
return false
|
56
|
+
end
|
43
57
|
if @tag != other.tag
|
44
|
-
return false
|
58
|
+
return false
|
45
59
|
elsif @value != other.value
|
46
60
|
return false
|
47
61
|
end
|
48
|
-
|
62
|
+
true
|
49
63
|
end
|
50
64
|
|
51
65
|
# turning it into a marc-hash element
|
52
66
|
def to_marchash
|
53
|
-
|
67
|
+
[@tag, @value]
|
54
68
|
end
|
55
|
-
|
69
|
+
|
56
70
|
# Turn the control field into a hash for MARC-in-JSON
|
57
71
|
def to_hash
|
58
|
-
|
72
|
+
{@tag => @value}
|
59
73
|
end
|
60
|
-
|
74
|
+
|
61
75
|
def to_s
|
62
|
-
|
76
|
+
"#{tag} #{value}"
|
63
77
|
end
|
64
78
|
|
65
79
|
def =~(regex)
|
66
|
-
|
67
|
-
end
|
68
|
-
|
80
|
+
to_s =~ regex
|
81
|
+
end
|
69
82
|
end
|
70
|
-
|
71
83
|
end
|
data/lib/marc/datafield.rb
CHANGED
@@ -1,11 +1,11 @@
|
|
1
1
|
module MARC
|
2
|
-
# MARC records contain data fields, each of which has a tag,
|
2
|
+
# MARC records contain data fields, each of which has a tag,
|
3
3
|
# indicators and subfields. Tags for data fields must are all
|
4
4
|
# three-character tags that are not control fields (generally,
|
5
5
|
# any numeric tag greater than 009).
|
6
6
|
#
|
7
7
|
# Accessor attributes: tag, indicator1, indicator2
|
8
|
-
#
|
8
|
+
#
|
9
9
|
# DataField mixes in Enumerable to enable access to it's constituent
|
10
10
|
# Subfield objects. For instance, if you have a DataField representing
|
11
11
|
# a 856 tag, and want to find all 'z' subfields:
|
@@ -14,7 +14,7 @@ module MARC
|
|
14
14
|
#
|
15
15
|
# Also, the accessor 'subfields' is an array of MARC::Subfield objects
|
16
16
|
# which can be accessed or modified by the client directly if
|
17
|
-
# neccesary.
|
17
|
+
# neccesary.
|
18
18
|
|
19
19
|
class DataField
|
20
20
|
include Enumerable
|
@@ -31,90 +31,100 @@ module MARC
|
|
31
31
|
# A list of MARC::Subfield objects
|
32
32
|
attr_accessor :subfields
|
33
33
|
|
34
|
-
|
35
34
|
# Create a new field with tag, indicators and subfields.
|
36
|
-
# Subfields are passed in as comma separated list of
|
37
|
-
# MARC::Subfield objects,
|
38
|
-
#
|
35
|
+
# Subfields are passed in as comma separated list of
|
36
|
+
# MARC::Subfield objects,
|
37
|
+
#
|
39
38
|
# field = MARC::DataField.new('245','0','0',
|
40
39
|
# MARC::Subfield.new('a', 'Consilience :'),
|
41
40
|
# MARC::Subfield.new('b', 'the unity of knowledge '),
|
42
41
|
# MARC::Subfield.new('c', 'by Edward O. Wilson.'))
|
43
|
-
#
|
42
|
+
#
|
44
43
|
# or using a shorthand:
|
45
|
-
#
|
44
|
+
#
|
46
45
|
# field = MARC::DataField.new('245','0','0',
|
47
46
|
# ['a', 'Consilience :'],['b','the unity of knowledge '],
|
48
47
|
# ['c', 'by Edward O. Wilson.'] )
|
49
48
|
|
50
|
-
def initialize(tag, i1=
|
51
|
-
# if the tag is less than 3 characters long and
|
49
|
+
def initialize(tag, i1 = " ", i2 = " ", *subfields)
|
50
|
+
# if the tag is less than 3 characters long and
|
52
51
|
# the string is all numeric then we pad with zeros
|
53
|
-
if tag.length < 3
|
54
|
-
|
52
|
+
@tag = if (tag.length < 3) && (/^[0-9]+$/ =~ tag)
|
53
|
+
"%03d" % tag
|
55
54
|
else
|
56
|
-
|
55
|
+
tag
|
57
56
|
end
|
58
|
-
# can't allow nil to be passed in or else it'll
|
57
|
+
# can't allow nil to be passed in or else it'll
|
59
58
|
# screw us up later when we try to encode
|
60
|
-
@indicator1 = i1
|
61
|
-
@indicator2 = i2
|
62
|
-
|
63
|
-
@subfields = []
|
59
|
+
@indicator1 = i1.nil? ? " " : i1
|
60
|
+
@indicator2 = i2.nil? ? " " : i2
|
64
61
|
|
65
|
-
|
66
|
-
# those in MARC::ControlField#extra_control_fields
|
67
|
-
|
68
|
-
if MARC::ControlField.control_tag?(@tag)
|
69
|
-
raise MARC::Exception.new(),
|
70
|
-
"MARC::DataField objects can't have ControlField tag '" + @tag + "')"
|
71
|
-
end
|
62
|
+
@subfields = []
|
72
63
|
|
73
64
|
# allows MARC::Subfield objects to be passed directly
|
74
65
|
# or a shorthand of ['a','Foo'], ['b','Bar']
|
75
|
-
subfields.each do |subfield|
|
66
|
+
subfields.each do |subfield|
|
76
67
|
case subfield
|
77
68
|
when MARC::Subfield
|
78
69
|
@subfields.push(subfield)
|
79
70
|
when Array
|
80
71
|
if subfield.length > 2
|
81
|
-
raise MARC::Exception.new
|
82
|
-
"arrays must only have 2 elements: " + subfield.to_s
|
72
|
+
raise MARC::Exception.new,
|
73
|
+
"arrays must only have 2 elements: " + subfield.to_s
|
83
74
|
end
|
84
75
|
@subfields.push(
|
85
|
-
MARC::Subfield.new(subfield[0],subfield[1])
|
86
|
-
|
87
|
-
|
76
|
+
MARC::Subfield.new(subfield[0], subfield[1])
|
77
|
+
)
|
78
|
+
else
|
79
|
+
raise MARC::Exception.new,
|
88
80
|
"invalid subfield type #{subfield.class}"
|
89
81
|
end
|
90
82
|
end
|
91
83
|
end
|
92
84
|
|
85
|
+
# Returns true if there are no error messages associated with the field
|
86
|
+
def valid?
|
87
|
+
errors.none?
|
88
|
+
end
|
89
|
+
|
90
|
+
# Returns an array of validation errors
|
91
|
+
def errors
|
92
|
+
messages = []
|
93
|
+
|
94
|
+
# must use MARC::ControlField for tags < 010 or
|
95
|
+
# those in MARC::ControlField#extra_control_fields
|
96
|
+
|
97
|
+
if MARC::ControlField.control_tag?(@tag)
|
98
|
+
messages << "MARC::DataField objects can't have ControlField tag '" + @tag + "'"
|
99
|
+
end
|
100
|
+
|
101
|
+
messages
|
102
|
+
end
|
93
103
|
|
94
104
|
# Returns a string representation of the field such as:
|
95
105
|
# 245 00 $aConsilience :$bthe unity of knowledge $cby Edward O. Wilson.
|
96
106
|
|
97
107
|
def to_s
|
98
108
|
str = "#{tag} "
|
99
|
-
str += "#{indicator1}#{indicator2} "
|
109
|
+
str += "#{indicator1}#{indicator2} "
|
100
110
|
@subfields.each { |subfield| str += subfield.to_s }
|
101
|
-
|
111
|
+
str
|
102
112
|
end
|
103
113
|
|
104
114
|
# Turn into a marc-hash structure
|
105
115
|
def to_marchash
|
106
|
-
|
116
|
+
[@tag, @indicator1, @indicator2, @subfields.map { |sf| [sf.code, sf.value] }]
|
107
117
|
end
|
108
|
-
|
118
|
+
|
109
119
|
# Turn the variable field and subfields into a hash for MARC-in-JSON
|
110
|
-
|
120
|
+
|
111
121
|
def to_hash
|
112
|
-
field_hash = {@tag=>{
|
113
|
-
|
114
|
-
field_hash[@tag][
|
122
|
+
field_hash = {@tag => {"ind1" => @indicator1, "ind2" => @indicator2, "subfields" => []}}
|
123
|
+
each do |subfield|
|
124
|
+
field_hash[@tag]["subfields"] << {subfield.code => subfield.value}
|
115
125
|
end
|
116
126
|
field_hash
|
117
|
-
end
|
127
|
+
end
|
118
128
|
|
119
129
|
# Add a subfield (MARC::Subfield) to the field
|
120
130
|
# field.append(MARC::Subfield.new('a','Dave Thomas'))
|
@@ -123,72 +133,69 @@ module MARC
|
|
123
133
|
@subfields.push(subfield)
|
124
134
|
end
|
125
135
|
|
126
|
-
|
127
|
-
|
128
136
|
# You can iterate through the subfields in a Field:
|
129
137
|
# field.each {|s| print s}
|
130
138
|
|
131
139
|
def each
|
132
|
-
|
140
|
+
subfields.each do |subfield|
|
133
141
|
yield subfield
|
134
142
|
end
|
135
143
|
end
|
136
144
|
|
137
|
-
#def each_by_code(filter)
|
145
|
+
# def each_by_code(filter)
|
138
146
|
# @subfields.each_by_code(filter)
|
139
|
-
#end
|
147
|
+
# end
|
140
148
|
|
141
|
-
# You can lookup subfields with this shorthand. Note it
|
149
|
+
# You can lookup subfields with this shorthand. Note it
|
142
150
|
# will return a string and not a MARC::Subfield object.
|
143
151
|
# subfield = field['a']
|
144
|
-
|
152
|
+
|
145
153
|
def [](code)
|
146
|
-
subfield =
|
154
|
+
subfield = find { |s| s.code == code }
|
147
155
|
return subfield.value if subfield
|
148
|
-
|
156
|
+
nil
|
149
157
|
end
|
150
|
-
|
151
158
|
|
152
|
-
def codes(dedup=true)
|
159
|
+
def codes(dedup = true)
|
153
160
|
codes = []
|
154
|
-
@subfields.each {|s| codes << s.code }
|
161
|
+
@subfields.each { |s| codes << s.code }
|
155
162
|
dedup ? codes.uniq : codes
|
156
163
|
end
|
157
164
|
|
158
|
-
# Two fields are equal if their tag, indicators and
|
165
|
+
# Two fields are equal if their tag, indicators and
|
159
166
|
# subfields are all equal.
|
160
167
|
|
161
168
|
def ==(other)
|
169
|
+
if !other.is_a?(DataField)
|
170
|
+
return false
|
171
|
+
end
|
162
172
|
if @tag != other.tag
|
163
|
-
return false
|
173
|
+
return false
|
164
174
|
elsif @indicator1 != other.indicator1
|
165
|
-
return false
|
175
|
+
return false
|
166
176
|
elsif @indicator2 != other.indicator2
|
167
|
-
return false
|
177
|
+
return false
|
168
178
|
elsif @subfields != other.subfields
|
169
179
|
return false
|
170
180
|
end
|
171
|
-
|
181
|
+
true
|
172
182
|
end
|
173
183
|
|
174
|
-
|
175
184
|
# To support regex matching with fields
|
176
185
|
#
|
177
186
|
# if field =~ /Huckleberry/ ...
|
178
187
|
|
179
188
|
def =~(regex)
|
180
|
-
|
189
|
+
to_s =~ regex
|
181
190
|
end
|
182
191
|
|
183
|
-
|
184
192
|
# to get the field as a string, without the tag and indicators
|
185
193
|
# useful in situations where you want a legible version of the field
|
186
194
|
#
|
187
195
|
# print record['245'].value
|
188
196
|
|
189
197
|
def value
|
190
|
-
|
198
|
+
(@subfields.map { |s| s.value }.join "")
|
191
199
|
end
|
192
|
-
|
193
200
|
end
|
194
201
|
end
|
data/lib/marc/dublincore.rb
CHANGED
@@ -1,79 +1,97 @@
|
|
1
1
|
module MARC
|
2
|
-
|
3
2
|
# A class for mapping MARC records to Dublin Core
|
4
|
-
|
5
|
-
class DublinCore
|
6
3
|
|
4
|
+
class DublinCore
|
7
5
|
def self.map(record)
|
8
|
-
dc_hash =
|
9
|
-
dc_hash[
|
6
|
+
dc_hash = {}
|
7
|
+
dc_hash["title"] = get_field_value(record["245"]["a"])
|
10
8
|
|
11
9
|
# Creator
|
12
|
-
[100, 110, 111, 700, 710, 711, 720].each do |field|
|
13
|
-
dc_hash[
|
14
|
-
dc_hash[
|
10
|
+
["100", "110", "111", "700", "710", "711", "720"].each do |field|
|
11
|
+
dc_hash["creator"] ||= []
|
12
|
+
dc_hash["creator"] << get_field_value(record[field])
|
15
13
|
end
|
16
14
|
|
17
15
|
# Subject
|
18
|
-
[600, 610, 611, 630, 650, 653].each do |field|
|
19
|
-
dc_hash[
|
20
|
-
dc_hash[
|
16
|
+
["600", "610", "611", "630", "650", "653"].each do |field|
|
17
|
+
dc_hash["subject"] ||= []
|
18
|
+
dc_hash["subject"] << get_field_value(record[field])
|
21
19
|
end
|
22
20
|
|
23
21
|
# Description
|
24
|
-
|
25
|
-
next if [506, 530, 540, 546].include?(field)
|
26
|
-
dc_hash[
|
27
|
-
dc_hash[
|
22
|
+
("500".."599").each do |field|
|
23
|
+
next if ["506", "530", "540", "546"].include?(field)
|
24
|
+
dc_hash["description"] ||= []
|
25
|
+
dc_hash["description"] << get_field_value(record[field])
|
28
26
|
end
|
29
27
|
|
30
|
-
dc_hash[
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
dc_hash[
|
36
|
-
|
28
|
+
dc_hash["publisher"] = begin
|
29
|
+
get_field_value(record["260"]["a"]["b"])
|
30
|
+
rescue
|
31
|
+
nil
|
32
|
+
end
|
33
|
+
dc_hash["date"] = begin
|
34
|
+
get_field_value(record["260"]["c"])
|
35
|
+
rescue
|
36
|
+
nil
|
37
|
+
end
|
38
|
+
dc_hash["type"] = get_field_value(record["655"])
|
39
|
+
dc_hash["format"] = begin
|
40
|
+
get_field_value(record["856"]["q"])
|
41
|
+
rescue
|
42
|
+
nil
|
43
|
+
end
|
44
|
+
dc_hash["identifier"] = begin
|
45
|
+
get_field_value(record["856"]["u"])
|
46
|
+
rescue
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
dc_hash["source"] = begin
|
50
|
+
get_field_value(record["786"]["o"]["t"])
|
51
|
+
rescue
|
52
|
+
nil
|
53
|
+
end
|
54
|
+
dc_hash["language"] = get_field_value(record["546"])
|
37
55
|
|
38
|
-
dc_hash[
|
39
|
-
dc_hash[
|
40
|
-
|
41
|
-
dc_hash[
|
56
|
+
dc_hash["relation"] = []
|
57
|
+
dc_hash["relation"] << get_field_value(record["530"])
|
58
|
+
("760".."787").each do |field|
|
59
|
+
dc_hash["relation"] << get_field_value(record[field]["o"]["t"])
|
60
|
+
rescue
|
61
|
+
nil
|
42
62
|
end
|
43
63
|
|
44
|
-
[651, 752].each do |field|
|
45
|
-
dc_hash[
|
46
|
-
dc_hash[
|
64
|
+
["651", "752"].each do |field|
|
65
|
+
dc_hash["coverage"] ||= []
|
66
|
+
dc_hash["coverage"] << get_field_value(record[field])
|
47
67
|
end
|
48
68
|
|
49
|
-
[506, 540].each do |field|
|
50
|
-
dc_hash[
|
51
|
-
dc_hash[
|
69
|
+
["506", "540"].each do |field|
|
70
|
+
dc_hash["rights"] ||= []
|
71
|
+
dc_hash["rights"] << get_field_value(record[field])
|
52
72
|
end
|
53
|
-
|
54
|
-
dc_hash.keys.each do |key|
|
73
|
+
|
74
|
+
dc_hash.keys.each do |key|
|
55
75
|
dc_hash[key].flatten! if dc_hash[key].respond_to?(:flatten!)
|
56
76
|
dc_hash[key].compact! if dc_hash[key].respond_to?(:compact!)
|
57
77
|
end
|
58
|
-
|
78
|
+
|
59
79
|
dc_hash
|
60
80
|
end
|
61
|
-
|
81
|
+
|
62
82
|
def self.get_field_value(field)
|
63
83
|
return if field.nil?
|
64
|
-
|
65
|
-
if !field.
|
84
|
+
|
85
|
+
if !field.is_a?(String) && field.respond_to?(:each)
|
66
86
|
values = []
|
67
87
|
field.each do |element|
|
68
88
|
values << get_field_value(element)
|
69
89
|
end
|
70
90
|
values
|
71
91
|
else
|
72
|
-
return field if field.
|
92
|
+
return field if field.is_a?(String)
|
73
93
|
return field.value if field.respond_to?(:value)
|
74
94
|
end
|
75
95
|
end
|
76
|
-
|
77
96
|
end
|
78
97
|
end
|
79
|
-
|
data/lib/marc/exception.rb
CHANGED
@@ -1,9 +1,17 @@
|
|
1
1
|
module MARC
|
2
|
-
|
3
2
|
# basic exception class for exceptions that
|
4
3
|
# can occur during MARC processing.
|
5
4
|
|
6
5
|
class Exception < RuntimeError
|
7
6
|
end
|
8
7
|
|
8
|
+
class RecordException < MARC::Exception
|
9
|
+
attr_reader :record
|
10
|
+
|
11
|
+
def initialize(record)
|
12
|
+
@record = record
|
13
|
+
id = @record["001"] || "<record with no 001>"
|
14
|
+
super("Record #{id}: #{@record.errors.join("\n....")}")
|
15
|
+
end
|
16
|
+
end
|
9
17
|
end
|
@@ -0,0 +1,33 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "json"
|
4
|
+
|
5
|
+
module MARC
|
6
|
+
# Read marc-in-json documents from a `.jsonl` file -- also called
|
7
|
+
# "newline-delimited JSON", which is a file with one JSON document on each line.
|
8
|
+
class JSONLReader
|
9
|
+
include Enumerable
|
10
|
+
|
11
|
+
# @param [String, IO] file A filename, or open File/IO type object, from which to read
|
12
|
+
def initialize(file)
|
13
|
+
if file.is_a?(String)
|
14
|
+
raise ArgumentError.new("File '#{file}' can't be found") unless File.exist?(file)
|
15
|
+
raise ArgumentError.new("File '#{file}' can't be opened for reading") unless File.readable?(file)
|
16
|
+
@handle = File.new(file)
|
17
|
+
elsif file.respond_to?(:read, 5)
|
18
|
+
@handle = file
|
19
|
+
else
|
20
|
+
raise ArgumentError, "must pass in path or file"
|
21
|
+
end
|
22
|
+
end
|
23
|
+
|
24
|
+
# Turn marc-in-json lines into actual marc records and yield them
|
25
|
+
# @yieldreturn [MARC::Record] record created from each line of the file
|
26
|
+
def each
|
27
|
+
return enum_for(:each) unless block_given?
|
28
|
+
@handle.each do |line|
|
29
|
+
yield MARC::Record.new_from_hash(JSON.parse(line))
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
33
|
+
end
|