marc 1.0.4 → 1.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (52) hide show
  1. checksums.yaml +4 -4
  2. data/.github/ISSUE_TEMPLATE/bug_report.md +30 -0
  3. data/.github/workflows/ruby.yml +24 -0
  4. data/.gitignore +17 -0
  5. data/.standard.yml +1 -0
  6. data/{Changes → CHANGELOG.md} +106 -29
  7. data/Gemfile +15 -0
  8. data/README.md +240 -47
  9. data/Rakefile +14 -14
  10. data/bin/marc +14 -0
  11. data/bin/marc2xml +17 -0
  12. data/examples/xml2marc.rb +10 -0
  13. data/lib/marc/constants.rb +3 -3
  14. data/lib/marc/controlfield.rb +35 -23
  15. data/lib/marc/datafield.rb +70 -63
  16. data/lib/marc/dublincore.rb +59 -41
  17. data/lib/marc/exception.rb +9 -1
  18. data/lib/marc/jsonl_reader.rb +33 -0
  19. data/lib/marc/jsonl_writer.rb +44 -0
  20. data/lib/marc/marc8/map_to_unicode.rb +16417 -16420
  21. data/lib/marc/marc8/to_unicode.rb +80 -86
  22. data/lib/marc/reader.rb +119 -121
  23. data/lib/marc/record.rb +72 -62
  24. data/lib/marc/subfield.rb +12 -10
  25. data/lib/marc/unsafe_xmlwriter.rb +93 -0
  26. data/lib/marc/version.rb +1 -1
  27. data/lib/marc/writer.rb +27 -30
  28. data/lib/marc/xml_parsers.rb +222 -197
  29. data/lib/marc/xmlreader.rb +131 -114
  30. data/lib/marc/xmlwriter.rb +93 -81
  31. data/lib/marc.rb +20 -18
  32. data/marc.gemspec +23 -0
  33. data/test/marc8/tc_marc8_mapping.rb +3 -3
  34. data/test/marc8/tc_to_unicode.rb +28 -32
  35. data/test/messed_up_leader.xml +9 -0
  36. data/test/tc_controlfield.rb +37 -34
  37. data/test/tc_datafield.rb +65 -60
  38. data/test/tc_dublincore.rb +9 -11
  39. data/test/tc_hash.rb +10 -13
  40. data/test/tc_jsonl.rb +19 -0
  41. data/test/tc_marchash.rb +17 -21
  42. data/test/tc_parsers.rb +108 -144
  43. data/test/tc_reader.rb +35 -36
  44. data/test/tc_reader_char_encodings.rb +149 -169
  45. data/test/tc_record.rb +143 -148
  46. data/test/tc_subfield.rb +14 -13
  47. data/test/tc_unsafe_xml.rb +95 -0
  48. data/test/tc_writer.rb +101 -108
  49. data/test/tc_xml.rb +99 -87
  50. data/test/tc_xml_error_handling.rb +7 -8
  51. data/test/ts_marc.rb +8 -8
  52. metadata +94 -9
data/test/tc_record.rb CHANGED
@@ -1,157 +1,152 @@
1
- require 'test/unit'
2
- require 'marc'
3
- require 'xmlsimple'
4
-
5
- def xml_cmp a, b
6
- eq_all_but_zero = Object.new.instance_eval do
7
- def ==(other)
8
- Integer(other) == 0 ? false : true
9
- end
10
- self
11
- end
12
- a = XmlSimple.xml_in(a.to_s, 'normalisespace' => eq_all_but_zero)
13
- b = XmlSimple.xml_in(b.to_s, 'normalisespace' => eq_all_but_zero)
14
- a == b
15
- end
16
-
17
- class TestRecord < Test::Unit::TestCase
18
-
19
- def test_constructor
20
- r = MARC::Record.new()
21
- assert_equal(r.class, MARC::Record)
22
- end
23
-
24
- def test_xml
25
- r = get_record()
26
- doc = r.to_xml
27
- assert_kind_of REXML::Element, doc
28
- assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
29
- end
30
-
31
- def test_append_field
32
- r = get_record()
33
- assert_equal(r.fields.length(), 2)
34
- end
35
-
36
- def test_iterator
37
- r = get_record()
38
- count = 0
39
- r.each {|f| count += 1}
40
- assert_equal(count,2)
41
- end
42
-
43
- def test_decode
44
- raw = IO.read('test/one.dat')
45
- r = MARC::Record::new_from_marc(raw)
46
- assert_equal(r.class, MARC::Record)
47
- assert_equal(r.leader, '00755cam 22002414a 4500')
48
- assert_equal(r.fields.length(), 18)
49
- assert_equal(r.find {|f| f.tag == '245'}.to_s,
50
- '245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ')
51
- end
52
-
53
- def test_decode_forgiving
54
- raw = IO.read('test/one.dat')
55
- r = MARC::Record::new_from_marc(raw, :forgiving => true)
56
- assert_equal(r.class, MARC::Record)
57
- assert_equal(r.leader, '00755cam 22002414a 4500')
58
- assert_equal(r.fields.length(), 18)
59
- assert_equal(r.find {|f| f.tag == '245'}.to_s,
60
- '245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ')
61
- end
62
-
63
- def test_encode
64
- r1 = MARC::Record.new()
65
- r1.append(MARC::DataField.new('100', '2', '0', ['a', 'Thomas, Dave']))
66
- r1.append(MARC::DataField.new('245', '0', '0', ['a', 'Pragmatic Programmer']))
67
- raw = r1.to_marc()
68
- r2 = MARC::Record::new_from_marc(raw)
69
- assert_equal(r1, r2)
70
- end
71
-
72
- def test_lookup_shorthand
73
- r = get_record
74
- assert_equal(r['100']['a'], 'Thomas, Dave')
75
- end
76
-
77
- def get_record
78
- r = MARC::Record.new()
79
- r.append(MARC::DataField.new('100', '2', '0', ['a', 'Thomas, Dave']))
80
- r.append(MARC::DataField.new('245', '0', '4', ['The Pragmatic Programmer']))
81
- return r
82
- end
83
-
84
- def test_field_index
85
- raw = IO.read('test/random_tag_order.dat')
86
- r = MARC::Record.new_from_marc(raw)
87
- assert_kind_of(Array, r.fields)
88
- assert_kind_of(Array, r.tags)
89
- assert_equal(['001','005','007','008','010','028','035','040','050','245','260','300','500','505','511','650','700','906','953','991'], r.tags.sort)
90
- assert_kind_of(Array, r.fields('035'))
91
- raw2 = IO.read('test/random_tag_order2.dat')
92
- r2 = MARC::Record.new_from_marc(raw2)
93
- assert_equal(6, r2.fields('500').length)
94
- # Test passing an array to Record#fields
95
- assert_equal(3, r.fields(['500','505', '510', '511']).length)
96
- # Test passing a Range to Record#fields
97
- assert_equal(9, r.fields(('001'..'099')).length)
98
- end
99
-
100
- def test_field_index_order
101
- raw = IO.read('test/random_tag_order.dat')
102
- r = MARC::Record.new_from_marc(raw)
103
- notes = ['500','505','511']
104
- r.fields(('500'..'599')).each do |f|
105
- assert_equal(notes.pop, f.tag)
106
- end
107
-
108
-
109
- raw2 = IO.read('test/random_tag_order2.dat')
110
- r2 = MARC::Record.new_from_marc(raw2)
111
-
112
- fields = ['050','042','010','028','024','035','041','028','040','035','008','007','005','001']
113
- r2.each_by_tag(('001'..'099')) do |f|
114
- assert_equal(fields.pop, f.tag)
115
- end
116
-
117
- five_hundreds = r2.fields('500')
118
- assert_equal(five_hundreds.first['a'], '"Contemporary blues" interpretations of previously released songs; written by Bob Dylan.')
119
- assert_equal(five_hundreds.last['a'], 'Composer and program notes in container.')
1
+ require "test/unit"
2
+ require "marc"
3
+ require "xmlsimple"
4
+
5
+ def xml_cmp a, b
6
+ eq_all_but_zero = Object.new.instance_eval do
7
+ def ==(other)
8
+ !(Integer(other) == 0)
120
9
  end
121
10
 
11
+ self
12
+ end
13
+ a = XmlSimple.xml_in(a.to_s, "normalisespace" => eq_all_but_zero)
14
+ b = XmlSimple.xml_in(b.to_s, "normalisespace" => eq_all_but_zero)
15
+ a == b
16
+ end
122
17
 
123
- # Some tests for the internal FieldMap hash, normally
124
- # an implementation detail, but things get tricky and we need
125
- # tests to make sure we're good. Some of these you might
126
- # change if you change FieldMap caching implementation or contract/API.
127
- def test_direct_change_dirties_fieldmap
128
- # if we ask for #fields directly, and mutate it
129
- # with it's own methods, does any cache update?
130
- r = MARC::Record.new
131
- assert r.fields('500').empty?
132
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
133
- assert ! r.fields('500').empty?, "New 505 directly added to #fields is picked up"
134
-
135
- # Do it again, make sure #[] works too
136
- r = MARC::Record.new
137
- assert r['500'].nil?
138
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
139
- assert r['500'], "New 505 directly added to #fields is picked up"
18
+ class TestRecord < Test::Unit::TestCase
19
+ def test_constructor
20
+ r = MARC::Record.new
21
+ assert_equal(r.class, MARC::Record)
22
+ end
23
+
24
+ def test_xml
25
+ r = get_record
26
+ doc = r.to_xml
27
+ assert_kind_of REXML::Element, doc
28
+ assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
29
+ end
30
+
31
+ def test_append_field
32
+ r = get_record
33
+ assert_equal(r.fields.length, 2)
34
+ end
35
+
36
+ def test_iterator
37
+ r = get_record
38
+ count = 0
39
+ r.each { |f| count += 1 }
40
+ assert_equal(count, 2)
41
+ end
42
+
43
+ def test_decode
44
+ raw = IO.read("test/one.dat")
45
+ r = MARC::Record.new_from_marc(raw)
46
+ assert_equal(r.class, MARC::Record)
47
+ assert_equal(r.leader, "00755cam 22002414a 4500")
48
+ assert_equal(r.fields.length, 18)
49
+ assert_equal(r.find { |f| f.tag == "245" }.to_s,
50
+ "245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ")
51
+ end
52
+
53
+ def test_decode_forgiving
54
+ raw = IO.read("test/one.dat")
55
+ r = MARC::Record.new_from_marc(raw, forgiving: true)
56
+ assert_equal(r.class, MARC::Record)
57
+ assert_equal(r.leader, "00755cam 22002414a 4500")
58
+ assert_equal(r.fields.length, 18)
59
+ assert_equal(r.find { |f| f.tag == "245" }.to_s,
60
+ "245 10 $a ActivePerl with ASP and ADO / $c Tobias Martinsson. ")
61
+ end
62
+
63
+ def test_encode
64
+ r1 = MARC::Record.new
65
+ r1.append(MARC::DataField.new("100", "2", "0", ["a", "Thomas, Dave"]))
66
+ r1.append(MARC::DataField.new("245", "0", "0", ["a", "Pragmatic Programmer"]))
67
+ raw = r1.to_marc
68
+ r2 = MARC::Record.new_from_marc(raw)
69
+ assert_equal(r1, r2)
70
+ end
71
+
72
+ def test_lookup_shorthand
73
+ r = get_record
74
+ assert_equal(r["100"]["a"], "Thomas, Dave")
75
+ end
76
+
77
+ def get_record
78
+ r = MARC::Record.new
79
+ r.append(MARC::DataField.new("100", "2", "0", ["a", "Thomas, Dave"]))
80
+ r.append(MARC::DataField.new("245", "0", "4", ["The Pragmatic Programmer"]))
81
+ r
82
+ end
83
+
84
+ def test_field_index
85
+ raw = IO.read("test/random_tag_order.dat")
86
+ r = MARC::Record.new_from_marc(raw)
87
+ assert_kind_of(Array, r.fields)
88
+ assert_kind_of(Array, r.tags)
89
+ assert_equal(["001", "005", "007", "008", "010", "028", "035", "040", "050", "245", "260", "300", "500", "505", "511", "650", "700", "906", "953", "991"], r.tags.sort)
90
+ assert_kind_of(Array, r.fields("035"))
91
+ raw2 = IO.read("test/random_tag_order2.dat")
92
+ r2 = MARC::Record.new_from_marc(raw2)
93
+ assert_equal(6, r2.fields("500").length)
94
+ # Test passing an array to Record#fields
95
+ assert_equal(3, r.fields(["500", "505", "510", "511"]).length)
96
+ # Test passing a Range to Record#fields
97
+ assert_equal(9, r.fields(("001".."099")).length)
98
+ end
99
+
100
+ def test_field_index_order
101
+ raw = IO.read("test/random_tag_order.dat")
102
+ r = MARC::Record.new_from_marc(raw)
103
+ notes = ["500", "505", "511"]
104
+ r.fields(("500".."599")).each do |f|
105
+ assert_equal(notes.pop, f.tag)
140
106
  end
141
107
 
142
- def test_frozen_fieldmap
143
- r = MARC::Record.new
144
- r.fields.push MARC::DataField.new('500', ' ', ' ', ['a', 'notes'])
145
-
146
- r.fields.freeze
147
-
148
- r.fields.inspect
149
- r.fields
150
- assert ! r.fields('500').empty?
151
-
152
- assert r.fields.instance_variable_get("@clean"), "FieldMap still marked clean"
108
+ raw2 = IO.read("test/random_tag_order2.dat")
109
+ r2 = MARC::Record.new_from_marc(raw2)
153
110
 
111
+ fields = ["050", "042", "010", "028", "024", "035", "041", "028", "040", "035", "008", "007", "005", "001"]
112
+ r2.each_by_tag(("001".."099")) do |f|
113
+ assert_equal(fields.pop, f.tag)
154
114
  end
155
115
 
156
-
116
+ five_hundreds = r2.fields("500")
117
+ assert_equal(five_hundreds.first["a"], '"Contemporary blues" interpretations of previously released songs; written by Bob Dylan.')
118
+ assert_equal(five_hundreds.last["a"], "Composer and program notes in container.")
119
+ end
120
+
121
+ # Some tests for the internal FieldMap hash, normally
122
+ # an implementation detail, but things get tricky and we need
123
+ # tests to make sure we're good. Some of these you might
124
+ # change if you change FieldMap caching implementation or contract/API.
125
+ def test_direct_change_dirties_fieldmap
126
+ # if we ask for #fields directly, and mutate it
127
+ # with it's own methods, does any cache update?
128
+ r = MARC::Record.new
129
+ assert r.fields("500").empty?
130
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
131
+ assert !r.fields("500").empty?, "New 505 directly added to #fields is picked up"
132
+
133
+ # Do it again, make sure #[] works too
134
+ r = MARC::Record.new
135
+ assert r["500"].nil?
136
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
137
+ assert r["500"], "New 505 directly added to #fields is picked up"
138
+ end
139
+
140
+ def test_frozen_fieldmap
141
+ r = MARC::Record.new
142
+ r.fields.push MARC::DataField.new("500", " ", " ", ["a", "notes"])
143
+
144
+ r.fields.freeze
145
+
146
+ r.fields.inspect
147
+ r.fields
148
+ assert !r.fields("500").empty?
149
+
150
+ assert r.fields.instance_variable_get(:@clean), "FieldMap still marked clean"
151
+ end
157
152
  end
data/test/tc_subfield.rb CHANGED
@@ -1,18 +1,19 @@
1
- require 'test/unit'
2
- require 'marc/subfield'
1
+ require "test/unit"
2
+ require "marc/subfield"
3
3
 
4
4
  class SubfieldTest < Test::Unit::TestCase
5
+ def test_ok
6
+ s = MARC::Subfield.new("a", "foo")
7
+ assert_equal(s.code, "a")
8
+ assert_equal(s.value, "foo")
9
+ end
5
10
 
6
- def test_ok
7
- s = MARC::Subfield.new('a', 'foo')
8
- assert_equal(s.code, 'a')
9
- assert_equal(s.value, 'foo')
10
- end
11
-
12
- def test_equals
13
- s1 =MARC::Subfield.new('a', 'foo')
14
- s2 =MARC::Subfield.new('a', 'foo')
15
- assert_equal(s1,s2)
16
- end
11
+ def test_equals
12
+ s1 = MARC::Subfield.new("a", "foo")
13
+ s2 = MARC::Subfield.new("a", "foo")
14
+ assert_equal(s1, s2)
17
15
 
16
+ assert_not_equal(s1, "a")
17
+ assert_not_equal(s1, "foo")
18
+ end
18
19
  end
@@ -0,0 +1,95 @@
1
+ require "test/unit"
2
+ require "marc"
3
+ require "stringio"
4
+
5
+ class UnsafeXMLTest < Test::Unit::TestCase
6
+ def basic_rec
7
+ rec = MARC::Record.new
8
+ rec.leader = "00925njm 22002777a 4500"
9
+ rec.append MARC::ControlField.new("007", "sdubumennmplu")
10
+ rec.append MARC::DataField.new("245", "0", "4",
11
+ ["a", "The Great Ray Charles"], ["h", "[sound recording]."])
12
+ rec.append MARC::DataField.new("998", " ", " ",
13
+ ["^", "Valid local subfield"])
14
+ rec
15
+ end
16
+
17
+ def text_xml_entities
18
+ r1 = MARC::Record.new
19
+ r1 << MARC::DataField.new("245", "0", "0", ["a", "foo & bar & baz"])
20
+ xml = MARC::UnsafeXMLWriter.encode(r1)
21
+ assert_match(/foo &amp; bar &amp; baz/, xml)
22
+ reader = MARC::XMLReader.new(StringIO.new(xml), parser: parser)
23
+ r2 = reader.entries[0]
24
+ assert_equal "foo & bar & baz", r2["245"]["a"]
25
+ end
26
+
27
+ def test_read_write
28
+ record1 = MARC::Record.new
29
+ record1.leader = "00925njm 22002777a 4500"
30
+ record1.append MARC::ControlField.new("007", "sdubumennmplu")
31
+ record1.append MARC::DataField.new("245", "0", "4",
32
+ ["a", "The Great Ray Charles"], ["h", "[sound recording]."])
33
+ record1.append MARC::DataField.new("998", " ", " ",
34
+ ["^", "Valid local subfield"])
35
+
36
+ writer = MARC::UnsafeXMLWriter.new("test/test.xml", stylesheet: "style.xsl")
37
+ writer.write(record1)
38
+ writer.close
39
+
40
+ xml = File.read("test/test.xml")
41
+ assert_match(/<controlfield tag=["']007["']>sdubumennmplu<\/controlfield>/, xml)
42
+ assert_match(/<\?xml-stylesheet type=["']text\/xsl" href="style.xsl["']\?>/, xml)
43
+
44
+ reader = MARC::XMLReader.new("test/test.xml")
45
+ record2 = reader.first
46
+ assert_equal(record1, record2)
47
+ ensure
48
+ File.unlink("test/test.xml")
49
+ end
50
+
51
+ def test_truncated_leader_roundtripping
52
+ record1 = MARC::Record.new
53
+ record1.leader = "00925njm 22002777a"
54
+
55
+ writer = MARC::UnsafeXMLWriter.new("test/test.xml", stylesheet: "style.xsl")
56
+ writer.write(record1)
57
+ writer.close
58
+
59
+ reader = MARC::XMLReader.new("test/test.xml")
60
+ record2 = reader.first
61
+
62
+ assert_equal("00925njm 22002777a 4500", record2.leader)
63
+ ensure
64
+ File.unlink("test/test.xml")
65
+ end
66
+
67
+ def test_single_record_document
68
+ xml = MARC::UnsafeXMLWriter.single_record_document(basic_rec)
69
+ rec = MARC::XMLReader.new(StringIO.new(xml)).first
70
+ assert_equal(basic_rec, rec)
71
+ end
72
+
73
+ def test_encode_same_as_rexml
74
+ rex_xml = MARC::XMLWriter.encode(basic_rec).to_s
75
+ unsafe_xml = MARC::UnsafeXMLWriter.encode(basic_rec)
76
+ rex = MARC::XMLReader.new(StringIO.new(rex_xml)).first
77
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
78
+ assert_equal(rex, unsafe)
79
+ end
80
+
81
+ def test_to_xml_string
82
+ rex_xml = basic_rec.to_xml_string
83
+ unsafe_xml = basic_rec.to_xml_string(fast_but_unsafe: true, include_namespace: false)
84
+ rex = MARC::XMLReader.new(StringIO.new(rex_xml)).first
85
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
86
+ assert_equal(rex, unsafe)
87
+ end
88
+
89
+ def test_to_xml_string_with_namespaces
90
+ unsafe_xml = basic_rec.to_xml_string(fast_but_unsafe: true, include_namespace: true)
91
+ rex = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
92
+ unsafe = MARC::XMLReader.new(StringIO.new(unsafe_xml)).first
93
+ assert_equal(rex, unsafe)
94
+ end
95
+ end