marc 1.1.1 → 1.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +102 -30
  7. data/Gemfile +15 -0
  8. data/README.md +239 -46
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -86
  22. data/lib/marc/reader.rb +117 -123
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -82
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +23 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -32
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +101 -94
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +80 -9
data/test/tc_record.rb CHANGED
@@ -1,157 +1,152 @@
1
- require 'test/unit'
2
- require 'marc'
3
- require 'xmlsimple'
4
-
5
- def xml_cmp a, b
6
- eq_all_but_zero = Object.new.instance_eval do
7
- def ==(other)
8
- Integer(other) == 0 ? false : true
9
- end
10
- self
11
- end
12
- a = XmlSimple.xml_in(a.to_s, 'normalisespace' => eq_all_but_zero)
13
- b = XmlSimple.xml_in(b.to_s, 'normalisespace' => eq_all_but_zero)
14
- a == b
15
- end
16
-
17
- class TestRecord < Test::Unit::TestCase
18
-
19
- def test_constructor
20
- r = MARC::Record.new()
21
- assert_equal(r.class, MARC::Record)
22
- end
23
-
24
- def test_xml
25
- r = get_record()
26
- doc = r.to_xml
27
- assert_kind_of REXML::Element, doc
28
- assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
29
- end
30
-
31
- def test_append_field
32
- r = get_record()
33
- assert_equal(r.fields.length(), 2)
34
- end
35
-
36
- def test_iterator
37
- r = get_record()
38
- count = 0
39
- r.each {|f| count += 1}
40
- assert_equal(count,2)
41
- end
42
-
43
- def test_decode
44
- raw = IO.read('test/one.dat')
45
- r = MARC::Record::new_from_marc(raw)
46
- assert_equal(r.class, MARC::Record)
47
- assert_equal(r.leader, '00755cam 22002414a 4500')
48
- assert_equal(r.fields.length(), 18)
49
- assert_equal(r.find {|f| f.tag == '245'}.to_s,
50
- '245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ')
51
- end
52
-
53
- def test_decode_forgiving
54
- raw = IO.read('test/one.dat')
55
- r = MARC::Record::new_from_marc(raw, :forgiving => true)
56
- assert_equal(r.class, MARC::Record)
57
- assert_equal(r.leader, '00755cam 22002414a 4500')
58
- assert_equal(r.fields.length(), 18)
59
- assert_equal(r.find {|f| f.tag == '245'}.to_s,
60
- '245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ')
61
- end
62
-
63
- def test_encode
64
- r1 = MARC::Record.new()
65
- r1.append(MARC::DataField.new('100', '2', '0', ['a', 'Thomas, Dave']))
66
- r1.append(MARC::DataField.new('245', '0', '0', ['a', 'Pragmatic Programmer']))
67
- raw = r1.to_marc()
68
- r2 = MARC::Record::new_from_marc(raw)
69
- assert_equal(r1, r2)
70
- end
71
-
72
- def test_lookup_shorthand
73
- r = get_record
74
- assert_equal(r['100']['a'], 'Thomas, Dave')
75
- end
76
-
77
- def get_record
78
- r = MARC::Record.new()
79
- r.append(MARC::DataField.new('100', '2', '0', ['a', 'Thomas, Dave']))
80
- r.append(MARC::DataField.new('245', '0', '4', ['The Pragmatic Programmer']))
81
- return r
82
- end
83
-
84
- def test_field_index
85
- raw = IO.read('test/random_tag_order.dat')
86
- r = MARC::Record.new_from_marc(raw)
87
- assert_kind_of(Array, r.fields)
88
- assert_kind_of(Array, r.tags)
89
- assert_equal(['001','005','007','008','010','028','035','040','050','245','260','300','500','505','511','650','700','906','953','991'], r.tags.sort)
90
- assert_kind_of(Array, r.fields('035'))
91
- raw2 = IO.read('test/random_tag_order2.dat')
92
- r2 = MARC::Record.new_from_marc(raw2)
93
- assert_equal(6, r2.fields('500').length)
94
- # Test passing an array to Record#fields
95
- assert_equal(3, r.fields(['500','505', '510', '511']).length)
96
- # Test passing a Range to Record#fields
97
- assert_equal(9, r.fields(('001'..'099')).length)
98
- end
99
-
100
- def test_field_index_order
101
- raw = IO.read('test/random_tag_order.dat')
102
- r = MARC::Record.new_from_marc(raw)
103
- notes = ['500','505','511']
104
- r.fields(('500'..'599')).each do |f|
105
- assert_equal(notes.pop, f.tag)
106
- end
107
-
108
-
109
- raw2 = IO.read('test/random_tag_order2.dat')
110
- r2 = MARC::Record.new_from_marc(raw2)
111
-
112
- fields = ['050','042','010','028','024','035','041','028','040','035','008','007','005','001']
113
- r2.each_by_tag(('001'..'099')) do |f|
114
- assert_equal(fields.pop, f.tag)
115
- end
116
-
117
- five_hundreds = r2.fields('500')
118
- assert_equal(five_hundreds.first['a'], '"Contemporary blues" interpretations of previously released songs; written by Bob Dylan.')
119
- assert_equal(five_hundreds.last['a'], 'Composer and program notes in container.')
1
+ require "test/unit"
2
+ require "marc"
3
+ require "xmlsimple"
4
+
5
+ def xml_cmp a, b
6
+ eq_all_but_zero = Object.new.instance_eval do
7
+ def ==(other)
8
+ !(Integer(other) == 0)
120
9
  end
121
10
 
11
+ self
12
+ end
13
+ a = XmlSimple.xml_in(a.to_s, "normalisespace" => eq_all_but_zero)
14
+ b = XmlSimple.xml_in(b.to_s, "normalisespace" => eq_all_but_zero)
15
+ a == b
16
+ end
122
17
 
123
- # Some tests for the internal FieldMap hash, normally
124
- # an implementation detail, but things get tricky and we need
125
- # tests to make sure we're good. Some of these you might
126
- # change if you change FieldMap caching implementation or contract/API.
127
- def test_direct_change_dirties_fieldmap
128
- # if we ask for #fields directly, and mutate it
129
- # with it's own methods, does any cache update?
130
- r = MARC::Record.new
131
- assert r.fields('500').empty?
132
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
133
- assert ! r.fields('500').empty?, "New 505 directly added to #fields is picked up"
134
-
135
- # Do it again, make sure #[] works too
136
- r = MARC::Record.new
137
- assert r['500'].nil?
138
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
139
- assert r['500'], "New 505 directly added to #fields is picked up"
18
+ class TestRecord < Test::Unit::TestCase
19
+ def test_constructor
20
+ r = MARC::Record.new
21
+ assert_equal(r.class, MARC::Record)
22
+ end
23
+
24
+ def test_xml
25
+ r = get_record
26
+ doc = r.to_xml
27
+ assert_kind_of REXML::Element, doc
28
+ assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
29
+ end
30
+
31
+ def test_append_field
32
+ r = get_record
33
+ assert_equal(r.fields.length, 2)
34
+ end
35
+
36
+ def test_iterator
37
+ r = get_record
38
+ count = 0
39
+ r.each { |f| count += 1 }
40
+ assert_equal(count, 2)
41
+ end
42
+
43
+ def test_decode
44
+ raw = IO.read("test/one.dat")
45
+ r = MARC::Record.new_from_marc(raw)
46
+ assert_equal(r.class, MARC::Record)
47
+ assert_equal(r.leader, "00755cam 22002414a 4500")
48
+ assert_equal(r.fields.length, 18)
49
+ assert_equal(r.find { |f| f.tag == "245" }.to_s,
50
+ "245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ")
51
+ end
52
+
53
+ def test_decode_forgiving
54
+ raw = IO.read("test/one.dat")
55
+ r = MARC::Record.new_from_marc(raw, forgiving: true)
56
+ assert_equal(r.class, MARC::Record)
57
+ assert_equal(r.leader, "00755cam 22002414a 4500")
58
+ assert_equal(r.fields.length, 18)
59
+ assert_equal(r.find { |f| f.tag == "245" }.to_s,
60
+ "245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ")
61
+ end
62
+
63
+ def test_encode
64
+ r1 = MARC::Record.new
65
+ r1.append(MARC::DataField.new("100", "2", "0", ["a", "Thomas, Dave"]))
66
+ r1.append(MARC::DataField.new("245", "0", "0", ["a", "Pragmatic Programmer"]))
67
+ raw = r1.to_marc
68
+ r2 = MARC::Record.new_from_marc(raw)
69
+ assert_equal(r1, r2)
70
+ end
71
+
72
+ def test_lookup_shorthand
73
+ r = get_record
74
+ assert_equal(r["100"]["a"], "Thomas, Dave")
75
+ end
76
+
77
+ def get_record
78
+ r = MARC::Record.new
79
+ r.append(MARC::DataField.new("100", "2", "0", ["a", "Thomas, Dave"]))
80
+ r.append(MARC::DataField.new("245", "0", "4", ["The Pragmatic Programmer"]))
81
+ r
82
+ end
83
+
84
+ def test_field_index
85
+ raw = IO.read("test/random_tag_order.dat")
86
+ r = MARC::Record.new_from_marc(raw)
87
+ assert_kind_of(Array, r.fields)
88
+ assert_kind_of(Array, r.tags)
89
+ assert_equal(["001", "005", "007", "008", "010", "028", "035", "040", "050", "245", "260", "300", "500", "505", "511", "650", "700", "906", "953", "991"], r.tags.sort)
90
+ assert_kind_of(Array, r.fields("035"))
91
+ raw2 = IO.read("test/random_tag_order2.dat")
92
+ r2 = MARC::Record.new_from_marc(raw2)
93
+ assert_equal(6, r2.fields("500").length)
94
+ # Test passing an array to Record#fields
95
+ assert_equal(3, r.fields(["500", "505", "510", "511"]).length)
96
+ # Test passing a Range to Record#fields
97
+ assert_equal(9, r.fields(("001".."099")).length)
98
+ end
99
+
100
+ def test_field_index_order
101
+ raw = IO.read("test/random_tag_order.dat")
102
+ r = MARC::Record.new_from_marc(raw)
103
+ notes = ["500", "505", "511"]
104
+ r.fields(("500".."599")).each do |f|
105
+ assert_equal(notes.pop, f.tag)
140
106
  end
141
107
 
142
- def test_frozen_fieldmap
143
- r = MARC::Record.new
144
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
145
-
146
- r.fields.freeze
147
-
148
- r.fields.inspect
149
- r.fields
150
- assert ! r.fields('500').empty?
151
-
152
- assert r.fields.instance_variable_get("@clean"), "FieldMap still marked clean"
108
+ raw2 = IO.read("test/random_tag_order2.dat")
109
+ r2 = MARC::Record.new_from_marc(raw2)
153
110
 
111
+ fields = ["050", "042", "010", "028", "024", "035", "041", "028", "040", "035", "008", "007", "005", "001"]
112
+ r2.each_by_tag(("001".."099")) do |f|
113
+ assert_equal(fields.pop, f.tag)
154
114
  end
155
115
 
156
-
116
+ five_hundreds = r2.fields("500")
117
+ assert_equal(five_hundreds.first["a"], '"Contemporary blues" interpretations of previously released songs; written by Bob Dylan.')
118
+ assert_equal(five_hundreds.last["a"], "Composer and program notes in container.")
119
+ end
120
+
121
+ # Some tests for the internal FieldMap hash, normally
122
+ # an implementation detail, but things get tricky and we need
123
+ # tests to make sure we're good. Some of these you might
124
+ # change if you change FieldMap caching implementation or contract/API.
125
+ def test_direct_change_dirties_fieldmap
126
+ # if we ask for #fields directly, and mutate it
127
+ # with it's own methods, does any cache update?
128
+ r = MARC::Record.new
129
+ assert r.fields("500").empty?
130
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
131
+ assert !r.fields("500").empty?, "New 505 directly added to #fields is picked up"
132
+
133
+ # Do it again, make sure #[] works too
134
+ r = MARC::Record.new
135
+ assert r["500"].nil?
136
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
137
+ assert r["500"], "New 505 directly added to #fields is picked up"
138
+ end
139
+
140
+ def test_frozen_fieldmap
141
+ r = MARC::Record.new
142
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
143
+
144
+ r.fields.freeze
145
+
146
+ r.fields.inspect
147
+ r.fields
148
+ assert !r.fields("500").empty?
149
+
150
+ assert r.fields.instance_variable_get(:@clean), "FieldMap still marked clean"
151
+ end
157
152
  end
data/test/tc_subfield.rb CHANGED
@@ -1,18 +1,19 @@
1
- require 'test/unit'
2
- require 'marc/subfield'
1
+ require "test/unit"
2
+ require "marc/subfield"
3
3
 
4
4
  class SubfieldTest < Test::Unit::TestCase
5
+ def test_ok
6
+ s = MARC::Subfield.new("a", "foo")
7
+ assert_equal(s.code, "a")
8
+ assert_equal(s.value, "foo")
9
+ end
5
10
 
6
- def test_ok
7
- s = MARC::Subfield.new('a', 'foo')
8
- assert_equal(s.code, 'a')
9
- assert_equal(s.value, 'foo')
10
- end
11
-
12
- def test_equals
13
- s1 =MARC::Subfield.new('a', 'foo')
14
- s2 =MARC::Subfield.new('a', 'foo')
15
- assert_equal(s1,s2)
16
- end
11
+ def test_equals
12
+ s1 = MARC::Subfield.new("a", "foo")
13
+ s2 = MARC::Subfield.new("a", "foo")
14
+ assert_equal(s1, s2)
17
15
 
16
+ assert_not_equal(s1, "a")
17
+ assert_not_equal(s1, "foo")
18
+ end
18
19
  end
@@ -0,0 +1,95 @@
1
+ require "test/unit"
2
+ require "marc"
3
+ require "stringio"
4
+
5
+ class UnsafeXMLTest < Test::Unit::TestCase
6
+ def basic_rec
7
+ rec = MARC::Record.new
8
+ rec.leader = "00925njm 22002777a 4500"
9
+ rec.append MARC::ControlField.new("007", "sdubumennmplu")
10
+ rec.append MARC::DataField.new("245", "0", "4",
11
+ ["a", "The Great Ray Charles"], ["h", "[sound recording]."])
12
+ rec.append MARC::DataField.new("998", " ", " ",
13
+ ["^", "Valid local subfield"])
14
+ rec
15
+ end
16
+
17
+ def text_xml_entities
18
+ r1 = MARC::Record.new
19
+ r1 << MARC::DataField.new("245", "0", "0", ["a", "foo & bar & baz"])
20
+ xml = MARC::UnsafeXMLWriter.encode(r1)
21
+ assert_match(/foo &amp; bar &amp; baz/, xml)
22
+ reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
23
+ r2 = reader.entries[0]
24
+ assert_equal "foo & bar & baz", r2["245"]["a"]
25
+ end
26
+
27
+ def test_read_write
28
+ record1 = MARC::Record.new
29
+ record1.leader = "00925njm 22002777a 4500"
30
+ record1.append MARC::ControlField.new("007", "sdubumennmplu")
31
+ record1.append MARC::DataField.new("245", "0", "4",
32
+ ["a", "The Great Ray Charles"], ["h", "[sound recording]."])
33
+ record1.append MARC::DataField.new("998", " ", " ",
34
+ ["^", "Valid local subfield"])
35
+
36
+ writer = MARC::UnsafeXMLWriter.new("test/test.xml", stylesheet: "style.xsl")
37
+ writer.write(record1)
38
+ writer.close
39
+
40
+ xml = File.read("test/test.xml")
41
+ assert_match(/<controlfield tag=["']007["']>sdubumennmplu<\/controlfield>/, xml)
42
+ assert_match(/<\?xml-stylesheet type=["']text\/xsl" href="style.xsl["']\?>/, xml)
43
+
44
+ reader = MARC::XMLReader.new("test/test.xml")
45
+ record2 = reader.first
46
+ assert_equal(record1, record2)
47
+ ensure
48
+ File.unlink("test/test.xml")
49
+ end
50
+
51
+ def test_truncated_leader_roundtripping
52
+ record1 = MARC::Record.new
53
+ record1.leader = "00925njm 22002777a"
54
+
55
+ writer = MARC::UnsafeXMLWriter.new("test/test.xml", stylesheet: "style.xsl")
56
+ writer.write(record1)
57
+ writer.close
58
+
59
+ reader = MARC::XMLReader.new("test/test.xml")
60
+ record2 = reader.first
61
+
62
+ assert_equal("00925njm 22002777a 4500", record2.leader)
63
+ ensure
64
+ File.unlink("test/test.xml")
65
+ end
66
+
67
+ def test_single_record_document
68
+ xml = MARC::UnsafeXMLWriter.single_record_document(basic_rec)
69
+ rec = MARC::XMLReader.new(StringIO.new(xml)).first
70
+ assert_equal(basic_rec, rec)
71
+ end
72
+
73
+ def test_encode_same_as_rexml
74
+ rex_xml = MARC::XMLWriter.encode(basic_rec).to_s
75
+ unsafe_xml = MARC::UnsafeXMLWriter.encode(basic_rec)
76
+ rex = MARC::XMLReader.new(StringIO.new(rex_xml)).first
77
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
78
+ assert_equal(rex, unsafe)
79
+ end
80
+
81
+ def test_to_xml_string
82
+ rex_xml = basic_rec.to_xml_string
83
+ unsafe_xml = basic_rec.to_xml_string(fast_but_unsafe: true, include_namespace: false)
84
+ rex = MARC::XMLReader.new(StringIO.new(rex_xml)).first
85
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
86
+ assert_equal(rex, unsafe)
87
+ end
88
+
89
+ def test_to_xml_string_with_namespaces
90
+ unsafe_xml = basic_rec.to_xml_string(fast_but_unsafe: true, include_namespace: true)
91
+ rex = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
92
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
93
+ assert_equal(rex, unsafe)
94
+ end
95
+ end