marc 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://secure.travis-ci.org/ruby-marc/ruby-marc.png)](http://travis-ci.org/ruby-marc/ruby-marc)
2
+
1
3
  marc is a ruby library for reading and writing MAchine Readable Cataloging
2
4
  (MARC). More information about MARC can be found at <http://www.loc.gov/marc>.
3
5
 
@@ -9,21 +9,16 @@ module MARC
9
9
  class ControlField
10
10
 
11
11
  # Initially, control tags are the numbers 1 through 9 or the string '000'
12
- @@control_tags = Set.new( (1..9).to_a)
13
- @@control_tags << '000'
12
+ @@control_tags = Set.new(%w{000 001 002 003 004 005 006 007 008 009})
14
13
 
15
14
  def self.control_tags
16
15
  return @@control_tags
17
16
  end
18
-
19
- # A tag is a control tag if it is a member of the @@control_tags set
20
- # as either a string (e.g., 'FMT') or in its .to_i representation
21
- # (e.g., '008'.to_i == 3 is in @@control_tags by default)
22
-
17
+
18
+ # A tag is a control tag if tag.to_s is a member of the @@control_tags set.
23
19
  def self.control_tag?(tag)
24
- return (@@control_tags.include?(tag.to_i) or @@control_tags.include?(tag))
20
+ return @@control_tags.include? tag.to_s
25
21
  end
26
-
27
22
 
28
23
  # the tag value (007, 008, etc)
29
24
  attr_accessor :tag
@@ -177,22 +177,26 @@ module MARC
177
177
  # print record
178
178
  # end
179
179
  def each
180
- # while there is data left in the file
181
- while rec_length_s = @handle.read(5)
182
- # make sure the record length looks like an integer
183
- rec_length_i = rec_length_s.to_i
184
- if rec_length_i == 0
185
- raise MARC::Exception.new("invalid record length: #{rec_length_s}")
186
- end
180
+ unless block_given?
181
+ return self.enum_for(:each)
182
+ else
183
+ # while there is data left in the file
184
+ while rec_length_s = @handle.read(5)
185
+ # make sure the record length looks like an integer
186
+ rec_length_i = rec_length_s.to_i
187
+ if rec_length_i == 0
188
+ raise MARC::Exception.new("invalid record length: #{rec_length_s}")
189
+ end
187
190
 
188
- # get the raw MARC21 for a record back from the file
189
- # using the record length
190
- raw = rec_length_s + @handle.read(rec_length_i-5)
191
+ # get the raw MARC21 for a record back from the file
192
+ # using the record length
193
+ raw = rec_length_s + @handle.read(rec_length_i-5)
191
194
 
192
- # create a record from the data and return it
193
- #record = MARC::Record.new_from_marc(raw)
194
- record = MARC::Reader.decode(raw, @encoding_options)
195
- yield record
195
+ # create a record from the data and return it
196
+ #record = MARC::Record.new_from_marc(raw)
197
+ record = MARC::Reader.decode(raw, @encoding_options)
198
+ yield record
199
+ end
196
200
  end
197
201
  end
198
202
 
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
@@ -105,8 +105,12 @@ module MARC
105
105
 
106
106
  # Loop through the MARC records in the XML document
107
107
  def each(&block)
108
- @block = block
109
- @parser.parse(@handle)
108
+ unless block_given?
109
+ return self.enum_for(:each)
110
+ else
111
+ @block = block
112
+ @parser.parse(@handle)
113
+ end
110
114
  end
111
115
 
112
116
 
@@ -165,13 +169,17 @@ module MARC
165
169
 
166
170
  # Loop through the MARC records in the XML document
167
171
  def each
168
- while @parser.has_next?
169
- event = @parser.pull
170
- # if it's the start of a record element
171
- if event.start_element? and strip_ns(event[0]) == 'record'
172
- yield build_record
173
- end
174
- end
172
+ unless block_given?
173
+ return self.enum_for(:each)
174
+ else
175
+ while @parser.has_next?
176
+ event = @parser.pull
177
+ # if it's the start of a record element
178
+ if event.start_element? and strip_ns(event[0]) == 'record'
179
+ yield build_record
180
+ end
181
+ end
182
+ end
175
183
  end
176
184
 
177
185
  private
@@ -310,11 +318,15 @@ module MARC
310
318
  end
311
319
 
312
320
  def each
313
- while (@parser.read) do
314
- if @parser.local_name == 'record' && @parser.namespace_uri == @ns
315
- yield build_record
316
- end
317
- end # while
321
+ unless block_given?
322
+ return self.enum_for(:each)
323
+ else
324
+ while (@parser.read) do
325
+ if @parser.local_name == 'record' && @parser.namespace_uri == @ns
326
+ yield build_record
327
+ end
328
+ end # while
329
+ end
318
330
  end # each
319
331
 
320
332
  def build_record
@@ -370,9 +382,13 @@ end
370
382
  end
371
383
 
372
384
  # Loop through the MARC records in the XML document
373
- def each(&block)
374
- @block = block
375
- parser_dispatch
385
+ def each(&block)
386
+ unless block_given?
387
+ return self.enum_for(:each)
388
+ else
389
+ @block = block
390
+ parser_dispatch
391
+ end
376
392
  end
377
393
 
378
394
  def parser_dispatch
@@ -0,0 +1 @@
1
+ 01998cam a2200469 a 4500001000800000005001700008008004100025020001800066020001500084024001800099024001200117035001200129035001700141035002100158040003200179042000800211049000900219050002100228066000700249100002900256245010000285246008400385260015000469300002100619504006400640541010900704600004100813600004700854650002500901650002300926650002900949650002500978752002201003880003701025880013001062880017301192910002601365936002701391938003801418991006001456994001201516408398520120302131100.0110313s2011 is b 001 0 heb c a9789651321337 a96513213348 a00032002131958 a3221319 a4083985 aocn710973037 a(OCoLC)710973037 aWEINBcWEINBdHLSdIXAdCUY apcc aJHEE 4aDS149b.R38 2011 c(21 6880-01aRatsabi, Shalom.106880-02aAnarkhizm be-"Tsiyon" :bben Marṭin Buber le-Aharon Daṿid Gordon /cShalom Ratsabi.1 iTitle on t.p. verso:aAnarchy in "Zion" :bbetween Martin Buber and A.D. Gordon 6880-03a[Tel Aviv] :bʻAm ʻoved :bha-Makhon le-ḥeḳer ha-Tsiyonut ve-Yiśra'el ʻa. sh. Ḥayim Ṿaitsman, Universiṭat Tel Aviv,cc2011. a339 p. ;c23 cm. aIncludes bibliographical references (p. 320-330) and index. 3Eisenhower copy:cPurchased with support from the National Endowment for the Humanities;dFY2012.5MdBJ.10aBuber, Martin,d1878-1965xReligion.10aGordon, Aaron David,d1856-1922xReligion. 0aZionism and Judaism. 0aReligious Zionism. 0aZionismxHistoriography. 0aZionismxPhilosophy. aIsraeldTel Aviv.1 6100-01/(2/raרצבי, שלום.106245-02/(2/raאנרכיזם ב״ציון״ :bבין מרטין בובר לאהרן דוד גורדון /cשלום רצבי. 6260-03/(2/ra[תל אביב] :bעם עובד :bהמכון לחקר הציונות וישראל עʺש חיים ויצמן, אוניברסיטת תל אביב,cc2011. a4083985bHorizon bib# aPR 747581025 741225747 aA.I. WeinbergbWEINnwb2011369996 aDS149.R38 2011flcbelccc. 1q0i6085034lemainmemsel aC0bJHE
@@ -13,6 +13,14 @@ class TestField < Test::Unit::TestCase
13
13
  assert_not_equal(f1, f3)
14
14
  end
15
15
 
16
+ def test_alphabetic_tag
17
+ alph = MARC::DataField.new('ALF')
18
+ assert_equal 'ALF', alph.tag
19
+
20
+ alphnum = MARC::DataField.new('0D9')
21
+ assert_equal '0D9', alphnum.tag
22
+ end
23
+
16
24
  def test_indicators
17
25
  f1 = MARC::DataField.new('100', '0', '1')
18
26
  assert_equal('0', f1.indicator1)
@@ -72,5 +72,16 @@ class ReaderTest < Test::Unit::TestCase
72
72
  records = reader.find_all { |r| r =~ /Foo/ }
73
73
  assert_equal(0, records.length)
74
74
  end
75
+
76
+ def test_binary_enumerator
77
+ reader = MARC::Reader.new('test/batch.dat')
78
+ iter = reader.each
79
+ r = iter.next
80
+ assert_instance_of(MARC::Record, r)
81
+ 9.times {iter.next} # total of ten records
82
+ assert_raises(StopIteration) { iter.next }
83
+ end
84
+
85
+
75
86
 
76
87
  end
@@ -1,5 +1,18 @@
1
1
  require 'test/unit'
2
2
  require 'marc'
3
+ require 'xmlsimple'
4
+
5
+ def xml_cmp a, b
6
+ eq_all_but_zero = Object.new.instance_eval do
7
+ def ==(other)
8
+ Integer(other) == 0 ? false : true
9
+ end
10
+ self
11
+ end
12
+ a = XmlSimple.xml_in(a.to_s, 'normalisespace' => eq_all_but_zero)
13
+ b = XmlSimple.xml_in(b.to_s, 'normalisespace' => eq_all_but_zero)
14
+ a == b
15
+ end
3
16
 
4
17
  class TestRecord < Test::Unit::TestCase
5
18
 
@@ -12,12 +25,7 @@ class TestRecord < Test::Unit::TestCase
12
25
  r = get_record()
13
26
  doc = r.to_xml
14
27
  assert_kind_of REXML::Element, doc
15
- if RUBY_VERSION < '1.9.0'
16
- assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
17
- else
18
- # REXML inexplicably sorts the attributes alphabetically in Ruby 1.9
19
- assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield ind1='2' ind2='0' tag='100'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield ind1='0' ind2='4' tag='245'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
20
- end
28
+ assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
21
29
  end
22
30
 
23
31
  def test_append_field
@@ -0,0 +1,62 @@
1
+ require 'test/unit'
2
+
3
+
4
+ # jruby 1.7.4 (1.9.3p392) 2013-05-16 2390d3b on Java HotSpot(TM) 64-Bit Server VM 1.6.0_51-b11-457-11M4509 [darwin-x86_64]
5
+ class TestField < Test::Unit::TestCase
6
+
7
+ def test_confused_bytecount
8
+
9
+
10
+
11
+
12
+
13
+ string_with_ctrl = "hello\x1fhello".force_encoding("UTF-8")
14
+ # control chars like \x1F ARE legal UTF-8, this is correct:
15
+ assert string_with_ctrl.valid_encoding?
16
+
17
+ # It's even considered ascii_only? -- this is correct, both MRI and jruby
18
+ assert string_with_ctrl.ascii_only?
19
+
20
+
21
+ # For reasons I can't explain, I can only reproduce the
22
+ # problem right now by doing a split, on the control char
23
+ # (this does represent my actual use case)
24
+ # Whether the split operand is tagged ASCII or UTF-8 does not matter,
25
+ # case is identical either way.
26
+ elements = string_with_ctrl.split("\x1F".force_encoding("UTF-8"))
27
+
28
+ # For some reason weirdness only happens on the second one in the split
29
+ # in this case.
30
+ second = elements[1]
31
+
32
+
33
+ # For a string composed of all one-byte wide ascii, as this one is...
34
+ assert_equal "hello", second
35
+ assert second.ascii_only?
36
+
37
+ # string[0] and string.byteslice(0) shoudl be identical. They are
38
+ # different when the string contains multi-byte chars.
39
+ # using #[], we're okay
40
+ assert_equal "h", second[0]
41
+
42
+ # But on jruby, this following actually raises an exception!
43
+ assert_equal "h", second.byteslice(0)
44
+ # That one up there actually just raised!!!
45
+ # Java::JavaLang::ArrayIndexOutOfBoundsException: 12
46
+ # org.jruby.util.ByteList.equal(ByteList.java:960)
47
+
48
+ # In other cases I saw in my real app, it didn't raise, but
49
+ # did return the WRONG bytes. Ie, not a 'h' above as expected, or
50
+ # not:
51
+
52
+
53
+ assert_equal second[0], second.byteslice(0)
54
+ # but in jruby we never even get here, we raise.
55
+
56
+ # In MRI, we pass ALL these tests with no exceptions.
57
+ # (ruby 1.9.3p448 (2013-06-27 revision 41675) [x86_64-darwin12.4.0])
58
+ end
59
+
60
+
61
+
62
+ end
@@ -157,6 +157,25 @@ class XMLTest < Test::Unit::TestCase
157
157
 
158
158
  File.unlink('test/test.xml')
159
159
  end
160
+
161
+ def test_xml_enumerator
162
+ @parsers.each do | parser |
163
+ puts "\nRunning test_xml_enumerator with: #{parser}.\n"
164
+ xml_enumerator_test(parser)
165
+ end
166
+ end
167
+
168
+
169
+ def xml_enumerator_test(parser)
170
+ # confusingly, test/batch.xml only has two records, not 10 like batch.dat
171
+ reader = MARC::XMLReader.new('test/batch.xml', :parser=>parser)
172
+ iter = reader.each
173
+ r = iter.next
174
+ assert_instance_of(MARC::Record, r)
175
+ iter.next # total of two records
176
+ assert_raises(StopIteration) { iter.next }
177
+ end
178
+
160
179
 
161
180
  end
162
181
 
File without changes
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.5.1
4
+ version: 0.6.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Kevin Clarke
@@ -13,15 +13,14 @@ authors:
13
13
  autorequire: marc
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2013-07-11 00:00:00.000000000 Z
16
+ date: 2013-08-19 00:00:00.000000000 Z
17
17
  dependencies: []
18
- description:
18
+ description:
19
19
  email: ehs@pobox.com
20
20
  executables: []
21
21
  extensions: []
22
22
  extra_rdoc_files: []
23
23
  files:
24
- - lib/marc.rb
25
24
  - lib/marc/constants.rb
26
25
  - lib/marc/controlfield.rb
27
26
  - lib/marc/datafield.rb
@@ -35,10 +34,12 @@ files:
35
34
  - lib/marc/xml_parsers.rb
36
35
  - lib/marc/xmlreader.rb
37
36
  - lib/marc/xmlwriter.rb
37
+ - lib/marc.rb
38
38
  - test/batch.dat
39
39
  - test/batch.xml
40
40
  - test/cp866_multirecord.marc
41
41
  - test/cp866_unimarc.marc
42
+ - test/hebrew880s.marc
42
43
  - test/marc8_accented_chars.marc
43
44
  - test/no-leading-zero.xml
44
45
  - test/non-numeric.dat
@@ -57,6 +58,7 @@ files:
57
58
  - test/tc_reader_char_encodings.rb
58
59
  - test/tc_record.rb
59
60
  - test/tc_subfield.rb
61
+ - test/tc_weird_jruby_bytes.rb
60
62
  - test/tc_writer.rb
61
63
  - test/tc_xml.rb
62
64
  - test/ts_marc.rb
@@ -69,26 +71,26 @@ files:
69
71
  - LICENSE
70
72
  homepage: https://github.com/ruby-marc/ruby-marc/
71
73
  licenses: []
72
- post_install_message:
74
+ post_install_message:
73
75
  rdoc_options: []
74
76
  require_paths:
75
77
  - lib
76
78
  required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
77
80
  requirements:
78
- - - '>='
81
+ - - ! '>='
79
82
  - !ruby/object:Gem::Version
80
83
  version: 1.8.6
81
- none: false
82
84
  required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
83
86
  requirements:
84
- - - '>='
87
+ - - ! '>='
85
88
  - !ruby/object:Gem::Version
86
89
  version: '0'
87
- none: false
88
90
  requirements: []
89
- rubyforge_project:
90
- rubygems_version: 1.8.24
91
- signing_key:
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.23
93
+ signing_key:
92
94
  specification_version: 3
93
95
  summary: A ruby library for working with Machine Readable Cataloging
94
96
  test_files: