marc 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +2 -0
- data/lib/marc/controlfield.rb +4 -9
- data/lib/marc/reader.rb +18 -14
- data/lib/marc/version.rb +1 -1
- data/lib/marc/xml_parsers.rb +33 -17
- data/test/hebrew880s.marc +1 -0
- data/test/tc_datafield.rb +8 -0
- data/test/tc_reader.rb +11 -0
- data/test/tc_record.rb +14 -6
- data/test/tc_weird_jruby_bytes.rb +62 -0
- data/test/tc_xml.rb +19 -0
- data/test/ts_marc.rb +0 -0
- metadata +15 -13
data/README.md
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
[](http://travis-ci.org/ruby-marc/ruby-marc)
|
2
|
+
|
1
3
|
marc is a ruby library for reading and writing MAchine Readable Cataloging
|
2
4
|
(MARC). More information about MARC can be found at <http://www.loc.gov/marc>.
|
3
5
|
|
data/lib/marc/controlfield.rb
CHANGED
@@ -9,21 +9,16 @@ module MARC
|
|
9
9
|
class ControlField
|
10
10
|
|
11
11
|
# Initially, control tags are the numbers 1 through 9 or the string '000'
|
12
|
-
@@control_tags = Set.new(
|
13
|
-
@@control_tags << '000'
|
12
|
+
@@control_tags = Set.new(%w{000 001 002 003 004 005 006 007 008 009})
|
14
13
|
|
15
14
|
def self.control_tags
|
16
15
|
return @@control_tags
|
17
16
|
end
|
18
|
-
|
19
|
-
# A tag is a control tag if
|
20
|
-
# as either a string (e.g., 'FMT') or in its .to_i representation
|
21
|
-
# (e.g., '008'.to_i == 3 is in @@control_tags by default)
|
22
|
-
|
17
|
+
|
18
|
+
# A tag is a control tag if tag.to_s is a member of the @@control_tags set.
|
23
19
|
def self.control_tag?(tag)
|
24
|
-
return
|
20
|
+
return @@control_tags.include? tag.to_s
|
25
21
|
end
|
26
|
-
|
27
22
|
|
28
23
|
# the tag value (007, 008, etc)
|
29
24
|
attr_accessor :tag
|
data/lib/marc/reader.rb
CHANGED
@@ -177,22 +177,26 @@ module MARC
|
|
177
177
|
# print record
|
178
178
|
# end
|
179
179
|
def each
|
180
|
-
|
181
|
-
|
182
|
-
|
183
|
-
|
184
|
-
|
185
|
-
|
186
|
-
|
180
|
+
unless block_given?
|
181
|
+
return self.enum_for(:each)
|
182
|
+
else
|
183
|
+
# while there is data left in the file
|
184
|
+
while rec_length_s = @handle.read(5)
|
185
|
+
# make sure the record length looks like an integer
|
186
|
+
rec_length_i = rec_length_s.to_i
|
187
|
+
if rec_length_i == 0
|
188
|
+
raise MARC::Exception.new("invalid record length: #{rec_length_s}")
|
189
|
+
end
|
187
190
|
|
188
|
-
|
189
|
-
|
190
|
-
|
191
|
+
# get the raw MARC21 for a record back from the file
|
192
|
+
# using the record length
|
193
|
+
raw = rec_length_s + @handle.read(rec_length_i-5)
|
191
194
|
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
195
|
+
# create a record from the data and return it
|
196
|
+
#record = MARC::Record.new_from_marc(raw)
|
197
|
+
record = MARC::Reader.decode(raw, @encoding_options)
|
198
|
+
yield record
|
199
|
+
end
|
196
200
|
end
|
197
201
|
end
|
198
202
|
|
data/lib/marc/version.rb
CHANGED
data/lib/marc/xml_parsers.rb
CHANGED
@@ -105,8 +105,12 @@ module MARC
|
|
105
105
|
|
106
106
|
# Loop through the MARC records in the XML document
|
107
107
|
def each(&block)
|
108
|
-
|
109
|
-
|
108
|
+
unless block_given?
|
109
|
+
return self.enum_for(:each)
|
110
|
+
else
|
111
|
+
@block = block
|
112
|
+
@parser.parse(@handle)
|
113
|
+
end
|
110
114
|
end
|
111
115
|
|
112
116
|
|
@@ -165,13 +169,17 @@ module MARC
|
|
165
169
|
|
166
170
|
# Loop through the MARC records in the XML document
|
167
171
|
def each
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
|
172
|
+
unless block_given?
|
173
|
+
return self.enum_for(:each)
|
174
|
+
else
|
175
|
+
while @parser.has_next?
|
176
|
+
event = @parser.pull
|
177
|
+
# if it's the start of a record element
|
178
|
+
if event.start_element? and strip_ns(event[0]) == 'record'
|
179
|
+
yield build_record
|
180
|
+
end
|
181
|
+
end
|
182
|
+
end
|
175
183
|
end
|
176
184
|
|
177
185
|
private
|
@@ -310,11 +318,15 @@ module MARC
|
|
310
318
|
end
|
311
319
|
|
312
320
|
def each
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
321
|
+
unless block_given?
|
322
|
+
return self.enum_for(:each)
|
323
|
+
else
|
324
|
+
while (@parser.read) do
|
325
|
+
if @parser.local_name == 'record' && @parser.namespace_uri == @ns
|
326
|
+
yield build_record
|
327
|
+
end
|
328
|
+
end # while
|
329
|
+
end
|
318
330
|
end # each
|
319
331
|
|
320
332
|
def build_record
|
@@ -370,9 +382,13 @@ end
|
|
370
382
|
end
|
371
383
|
|
372
384
|
# Loop through the MARC records in the XML document
|
373
|
-
def each(&block)
|
374
|
-
|
375
|
-
|
385
|
+
def each(&block)
|
386
|
+
unless block_given?
|
387
|
+
return self.enum_for(:each)
|
388
|
+
else
|
389
|
+
@block = block
|
390
|
+
parser_dispatch
|
391
|
+
end
|
376
392
|
end
|
377
393
|
|
378
394
|
def parser_dispatch
|
@@ -0,0 +1 @@
|
|
1
|
+
01998cam a2200469 a 4500001000800000005001700008008004100025020001800066020001500084024001800099024001200117035001200129035001700141035002100158040003200179042000800211049000900219050002100228066000700249100002900256245010000285246008400385260015000469300002100619504006400640541010900704600004100813600004700854650002500901650002300926650002900949650002500978752002201003880003701025880013001062880017301192910002601365936002701391938003801418991006001456994001201516408398520120302131100.0110313s2011 is b 001 0 heb c a9789651321337 a96513213348 a00032002131958 a3221319 a4083985 aocn710973037 a(OCoLC)710973037 aWEINBcWEINBdHLSdIXAdCUY apcc aJHEE 4aDS149b.R38 2011 c(21 6880-01aRatsabi, Shalom.106880-02aAnarkhizm be-"Tsiyon" :bben Marṭin Buber le-Aharon Daṿid Gordon /cShalom Ratsabi.1 iTitle on t.p. verso:aAnarchy in "Zion" :bbetween Martin Buber and A.D. Gordon 6880-03a[Tel Aviv] :bʻAm ʻoved :bha-Makhon le-ḥeḳer ha-Tsiyonut ve-Yiśra'el ʻa. sh. Ḥayim Ṿaitsman, Universiṭat Tel Aviv,cc2011. a339 p. ;c23 cm. aIncludes bibliographical references (p. 320-330) and index. 3Eisenhower copy:cPurchased with support from the National Endowment for the Humanities;dFY2012.5MdBJ.10aBuber, Martin,d1878-1965xReligion.10aGordon, Aaron David,d1856-1922xReligion. 0aZionism and Judaism. 0aReligious Zionism. 0aZionismxHistoriography. 0aZionismxPhilosophy. aIsraeldTel Aviv.1 6100-01/(2/raרצבי, שלום.106245-02/(2/raאנרכיזם ב״ציון״ :bבין מרטין בובר לאהרן דוד גורדון /cשלום רצבי. 6260-03/(2/ra[תל אביב] :bעם עובד :bהמכון לחקר הציונות וישראל עʺש חיים ויצמן, אוניברסיטת תל אביב,cc2011. a4083985bHorizon bib# aPR 747581025 741225747 aA.I. WeinbergbWEINnwb2011369996 aDS149.R38 2011flcbelccc. 1q0i6085034lemainmemsel aC0bJHE
|
data/test/tc_datafield.rb
CHANGED
@@ -13,6 +13,14 @@ class TestField < Test::Unit::TestCase
|
|
13
13
|
assert_not_equal(f1, f3)
|
14
14
|
end
|
15
15
|
|
16
|
+
def test_alphabetic_tag
|
17
|
+
alph = MARC::DataField.new('ALF')
|
18
|
+
assert_equal 'ALF', alph.tag
|
19
|
+
|
20
|
+
alphnum = MARC::DataField.new('0D9')
|
21
|
+
assert_equal '0D9', alphnum.tag
|
22
|
+
end
|
23
|
+
|
16
24
|
def test_indicators
|
17
25
|
f1 = MARC::DataField.new('100', '0', '1')
|
18
26
|
assert_equal('0', f1.indicator1)
|
data/test/tc_reader.rb
CHANGED
@@ -72,5 +72,16 @@ class ReaderTest < Test::Unit::TestCase
|
|
72
72
|
records = reader.find_all { |r| r =~ /Foo/ }
|
73
73
|
assert_equal(0, records.length)
|
74
74
|
end
|
75
|
+
|
76
|
+
def test_binary_enumerator
|
77
|
+
reader = MARC::Reader.new('test/batch.dat')
|
78
|
+
iter = reader.each
|
79
|
+
r = iter.next
|
80
|
+
assert_instance_of(MARC::Record, r)
|
81
|
+
9.times {iter.next} # total of ten records
|
82
|
+
assert_raises(StopIteration) { iter.next }
|
83
|
+
end
|
84
|
+
|
85
|
+
|
75
86
|
|
76
87
|
end
|
data/test/tc_record.rb
CHANGED
@@ -1,5 +1,18 @@
|
|
1
1
|
require 'test/unit'
|
2
2
|
require 'marc'
|
3
|
+
require 'xmlsimple'
|
4
|
+
|
5
|
+
def xml_cmp a, b
|
6
|
+
eq_all_but_zero = Object.new.instance_eval do
|
7
|
+
def ==(other)
|
8
|
+
Integer(other) == 0 ? false : true
|
9
|
+
end
|
10
|
+
self
|
11
|
+
end
|
12
|
+
a = XmlSimple.xml_in(a.to_s, 'normalisespace' => eq_all_but_zero)
|
13
|
+
b = XmlSimple.xml_in(b.to_s, 'normalisespace' => eq_all_but_zero)
|
14
|
+
a == b
|
15
|
+
end
|
3
16
|
|
4
17
|
class TestRecord < Test::Unit::TestCase
|
5
18
|
|
@@ -12,12 +25,7 @@ class TestRecord < Test::Unit::TestCase
|
|
12
25
|
r = get_record()
|
13
26
|
doc = r.to_xml
|
14
27
|
assert_kind_of REXML::Element, doc
|
15
|
-
|
16
|
-
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
17
|
-
else
|
18
|
-
# REXML inexplicably sorts the attributes alphabetically in Ruby 1.9
|
19
|
-
assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield ind1='2' ind2='0' tag='100'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield ind1='0' ind2='4' tag='245'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
|
20
|
-
end
|
28
|
+
assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
|
21
29
|
end
|
22
30
|
|
23
31
|
def test_append_field
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require 'test/unit'
|
2
|
+
|
3
|
+
|
4
|
+
# jruby 1.7.4 (1.9.3p392) 2013-05-16 2390d3b on Java HotSpot(TM) 64-Bit Server VM 1.6.0_51-b11-457-11M4509 [darwin-x86_64]
|
5
|
+
class TestField < Test::Unit::TestCase
|
6
|
+
|
7
|
+
def test_confused_bytecount
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
|
12
|
+
|
13
|
+
string_with_ctrl = "hello\x1fhello".force_encoding("UTF-8")
|
14
|
+
# control chars like \x1F ARE legal UTF-8, this is correct:
|
15
|
+
assert string_with_ctrl.valid_encoding?
|
16
|
+
|
17
|
+
# It's even considered ascii_only? -- this is correct, both MRI and jruby
|
18
|
+
assert string_with_ctrl.ascii_only?
|
19
|
+
|
20
|
+
|
21
|
+
# For reasons I can't explain, I can only reproduce the
|
22
|
+
# problem right now by doing a split, on the control char
|
23
|
+
# (this does represent my actual use case)
|
24
|
+
# Whether the split operand is tagged ASCII or UTF-8 does not matter,
|
25
|
+
# case is identical either way.
|
26
|
+
elements = string_with_ctrl.split("\x1F".force_encoding("UTF-8"))
|
27
|
+
|
28
|
+
# For some reason weirdness only happens on the second one in the split
|
29
|
+
# in this case.
|
30
|
+
second = elements[1]
|
31
|
+
|
32
|
+
|
33
|
+
# For a string composed of all one-byte wide ascii, as this one is...
|
34
|
+
assert_equal "hello", second
|
35
|
+
assert second.ascii_only?
|
36
|
+
|
37
|
+
# string[0] and string.byteslice(0) shoudl be identical. They are
|
38
|
+
# different when the string contains multi-byte chars.
|
39
|
+
# using #[], we're okay
|
40
|
+
assert_equal "h", second[0]
|
41
|
+
|
42
|
+
# But on jruby, this following actually raises an exception!
|
43
|
+
assert_equal "h", second.byteslice(0)
|
44
|
+
# That one up there actually just raised!!!
|
45
|
+
# Java::JavaLang::ArrayIndexOutOfBoundsException: 12
|
46
|
+
# org.jruby.util.ByteList.equal(ByteList.java:960)
|
47
|
+
|
48
|
+
# In other cases I saw in my real app, it didn't raise, but
|
49
|
+
# did return the WRONG bytes. Ie, not a 'h' above as expected, or
|
50
|
+
# not:
|
51
|
+
|
52
|
+
|
53
|
+
assert_equal second[0], second.byteslice(0)
|
54
|
+
# but in jruby we never even get here, we raise.
|
55
|
+
|
56
|
+
# In MRI, we pass ALL these tests with no exceptions.
|
57
|
+
# (ruby 1.9.3p448 (2013-06-27 revision 41675) [x86_64-darwin12.4.0])
|
58
|
+
end
|
59
|
+
|
60
|
+
|
61
|
+
|
62
|
+
end
|
data/test/tc_xml.rb
CHANGED
@@ -157,6 +157,25 @@ class XMLTest < Test::Unit::TestCase
|
|
157
157
|
|
158
158
|
File.unlink('test/test.xml')
|
159
159
|
end
|
160
|
+
|
161
|
+
def test_xml_enumerator
|
162
|
+
@parsers.each do | parser |
|
163
|
+
puts "\nRunning test_xml_enumerator with: #{parser}.\n"
|
164
|
+
xml_enumerator_test(parser)
|
165
|
+
end
|
166
|
+
end
|
167
|
+
|
168
|
+
|
169
|
+
def xml_enumerator_test(parser)
|
170
|
+
# confusingly, test/batch.xml only has two records, not 10 like batch.dat
|
171
|
+
reader = MARC::XMLReader.new('test/batch.xml', :parser=>parser)
|
172
|
+
iter = reader.each
|
173
|
+
r = iter.next
|
174
|
+
assert_instance_of(MARC::Record, r)
|
175
|
+
iter.next # total of two records
|
176
|
+
assert_raises(StopIteration) { iter.next }
|
177
|
+
end
|
178
|
+
|
160
179
|
|
161
180
|
end
|
162
181
|
|
data/test/ts_marc.rb
CHANGED
File without changes
|
metadata
CHANGED
@@ -1,8 +1,8 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: marc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
5
|
-
|
4
|
+
version: 0.6.0
|
5
|
+
prerelease:
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
8
8
|
- Kevin Clarke
|
@@ -13,15 +13,14 @@ authors:
|
|
13
13
|
autorequire: marc
|
14
14
|
bindir: bin
|
15
15
|
cert_chain: []
|
16
|
-
date: 2013-
|
16
|
+
date: 2013-08-19 00:00:00.000000000 Z
|
17
17
|
dependencies: []
|
18
|
-
description:
|
18
|
+
description:
|
19
19
|
email: ehs@pobox.com
|
20
20
|
executables: []
|
21
21
|
extensions: []
|
22
22
|
extra_rdoc_files: []
|
23
23
|
files:
|
24
|
-
- lib/marc.rb
|
25
24
|
- lib/marc/constants.rb
|
26
25
|
- lib/marc/controlfield.rb
|
27
26
|
- lib/marc/datafield.rb
|
@@ -35,10 +34,12 @@ files:
|
|
35
34
|
- lib/marc/xml_parsers.rb
|
36
35
|
- lib/marc/xmlreader.rb
|
37
36
|
- lib/marc/xmlwriter.rb
|
37
|
+
- lib/marc.rb
|
38
38
|
- test/batch.dat
|
39
39
|
- test/batch.xml
|
40
40
|
- test/cp866_multirecord.marc
|
41
41
|
- test/cp866_unimarc.marc
|
42
|
+
- test/hebrew880s.marc
|
42
43
|
- test/marc8_accented_chars.marc
|
43
44
|
- test/no-leading-zero.xml
|
44
45
|
- test/non-numeric.dat
|
@@ -57,6 +58,7 @@ files:
|
|
57
58
|
- test/tc_reader_char_encodings.rb
|
58
59
|
- test/tc_record.rb
|
59
60
|
- test/tc_subfield.rb
|
61
|
+
- test/tc_weird_jruby_bytes.rb
|
60
62
|
- test/tc_writer.rb
|
61
63
|
- test/tc_xml.rb
|
62
64
|
- test/ts_marc.rb
|
@@ -69,26 +71,26 @@ files:
|
|
69
71
|
- LICENSE
|
70
72
|
homepage: https://github.com/ruby-marc/ruby-marc/
|
71
73
|
licenses: []
|
72
|
-
post_install_message:
|
74
|
+
post_install_message:
|
73
75
|
rdoc_options: []
|
74
76
|
require_paths:
|
75
77
|
- lib
|
76
78
|
required_ruby_version: !ruby/object:Gem::Requirement
|
79
|
+
none: false
|
77
80
|
requirements:
|
78
|
-
- - '>='
|
81
|
+
- - ! '>='
|
79
82
|
- !ruby/object:Gem::Version
|
80
83
|
version: 1.8.6
|
81
|
-
none: false
|
82
84
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
85
|
+
none: false
|
83
86
|
requirements:
|
84
|
-
- - '>='
|
87
|
+
- - ! '>='
|
85
88
|
- !ruby/object:Gem::Version
|
86
89
|
version: '0'
|
87
|
-
none: false
|
88
90
|
requirements: []
|
89
|
-
rubyforge_project:
|
90
|
-
rubygems_version: 1.8.
|
91
|
-
signing_key:
|
91
|
+
rubyforge_project:
|
92
|
+
rubygems_version: 1.8.23
|
93
|
+
signing_key:
|
92
94
|
specification_version: 3
|
93
95
|
summary: A ruby library for working with Machine Readable Cataloging
|
94
96
|
test_files:
|