marc 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/README.md CHANGED
@@ -1,3 +1,5 @@
1
+ [![Build Status](https://secure.travis-ci.org/ruby-marc/ruby-marc.png)](http://travis-ci.org/ruby-marc/ruby-marc)
2
+
1
3
  marc is a ruby library for reading and writing MAchine Readable Cataloging
2
4
  (MARC). More information about MARC can be found at <http://www.loc.gov/marc>.
3
5
 
@@ -9,21 +9,16 @@ module MARC
9
9
  class ControlField
10
10
 
11
11
  # Initially, control tags are the numbers 1 through 9 or the string '000'
12
- @@control_tags = Set.new( (1..9).to_a)
13
- @@control_tags << '000'
12
+ @@control_tags = Set.new(%w{000 001 002 003 004 005 006 007 008 009})
14
13
 
15
14
  def self.control_tags
16
15
  return @@control_tags
17
16
  end
18
-
19
- # A tag is a control tag if it is a member of the @@control_tags set
20
- # as either a string (e.g., 'FMT') or in its .to_i representation
21
- # (e.g., '008'.to_i == 3 is in @@control_tags by default)
22
-
17
+
18
+ # A tag is a control tag if tag.to_s is a member of the @@control_tags set.
23
19
  def self.control_tag?(tag)
24
- return (@@control_tags.include?(tag.to_i) or @@control_tags.include?(tag))
20
+ return @@control_tags.include? tag.to_s
25
21
  end
26
-
27
22
 
28
23
  # the tag value (007, 008, etc)
29
24
  attr_accessor :tag
@@ -177,22 +177,26 @@ module MARC
177
177
  # print record
178
178
  # end
179
179
  def each
180
- # while there is data left in the file
181
- while rec_length_s = @handle.read(5)
182
- # make sure the record length looks like an integer
183
- rec_length_i = rec_length_s.to_i
184
- if rec_length_i == 0
185
- raise MARC::Exception.new("invalid record length: #{rec_length_s}")
186
- end
180
+ unless block_given?
181
+ return self.enum_for(:each)
182
+ else
183
+ # while there is data left in the file
184
+ while rec_length_s = @handle.read(5)
185
+ # make sure the record length looks like an integer
186
+ rec_length_i = rec_length_s.to_i
187
+ if rec_length_i == 0
188
+ raise MARC::Exception.new("invalid record length: #{rec_length_s}")
189
+ end
187
190
 
188
- # get the raw MARC21 for a record back from the file
189
- # using the record length
190
- raw = rec_length_s + @handle.read(rec_length_i-5)
191
+ # get the raw MARC21 for a record back from the file
192
+ # using the record length
193
+ raw = rec_length_s + @handle.read(rec_length_i-5)
191
194
 
192
- # create a record from the data and return it
193
- #record = MARC::Record.new_from_marc(raw)
194
- record = MARC::Reader.decode(raw, @encoding_options)
195
- yield record
195
+ # create a record from the data and return it
196
+ #record = MARC::Record.new_from_marc(raw)
197
+ record = MARC::Reader.decode(raw, @encoding_options)
198
+ yield record
199
+ end
196
200
  end
197
201
  end
198
202
 
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.5.1"
2
+ VERSION = "0.6.0"
3
3
  end
@@ -105,8 +105,12 @@ module MARC
105
105
 
106
106
  # Loop through the MARC records in the XML document
107
107
  def each(&block)
108
- @block = block
109
- @parser.parse(@handle)
108
+ unless block_given?
109
+ return self.enum_for(:each)
110
+ else
111
+ @block = block
112
+ @parser.parse(@handle)
113
+ end
110
114
  end
111
115
 
112
116
 
@@ -165,13 +169,17 @@ module MARC
165
169
 
166
170
  # Loop through the MARC records in the XML document
167
171
  def each
168
- while @parser.has_next?
169
- event = @parser.pull
170
- # if it's the start of a record element
171
- if event.start_element? and strip_ns(event[0]) == 'record'
172
- yield build_record
173
- end
174
- end
172
+ unless block_given?
173
+ return self.enum_for(:each)
174
+ else
175
+ while @parser.has_next?
176
+ event = @parser.pull
177
+ # if it's the start of a record element
178
+ if event.start_element? and strip_ns(event[0]) == 'record'
179
+ yield build_record
180
+ end
181
+ end
182
+ end
175
183
  end
176
184
 
177
185
  private
@@ -310,11 +318,15 @@ module MARC
310
318
  end
311
319
 
312
320
  def each
313
- while (@parser.read) do
314
- if @parser.local_name == 'record' && @parser.namespace_uri == @ns
315
- yield build_record
316
- end
317
- end # while
321
+ unless block_given?
322
+ return self.enum_for(:each)
323
+ else
324
+ while (@parser.read) do
325
+ if @parser.local_name == 'record' && @parser.namespace_uri == @ns
326
+ yield build_record
327
+ end
328
+ end # while
329
+ end
318
330
  end # each
319
331
 
320
332
  def build_record
@@ -370,9 +382,13 @@ end
370
382
  end
371
383
 
372
384
  # Loop through the MARC records in the XML document
373
- def each(&block)
374
- @block = block
375
- parser_dispatch
385
+ def each(&block)
386
+ unless block_given?
387
+ return self.enum_for(:each)
388
+ else
389
+ @block = block
390
+ parser_dispatch
391
+ end
376
392
  end
377
393
 
378
394
  def parser_dispatch
@@ -0,0 +1 @@
1
+ 01998cam a2200469 a 4500001000800000005001700008008004100025020001800066020001500084024001800099024001200117035001200129035001700141035002100158040003200179042000800211049000900219050002100228066000700249100002900256245010000285246008400385260015000469300002100619504006400640541010900704600004100813600004700854650002500901650002300926650002900949650002500978752002201003880003701025880013001062880017301192910002601365936002701391938003801418991006001456994001201516408398520120302131100.0110313s2011 is b 001 0 heb c a9789651321337 a96513213348 a00032002131958 a3221319 a4083985 aocn710973037 a(OCoLC)710973037 aWEINBcWEINBdHLSdIXAdCUY apcc aJHEE 4aDS149b.R38 2011 c(21 6880-01aRatsabi, Shalom.106880-02aAnarkhizm be-"Tsiyon" :bben Marṭin Buber le-Aharon Daṿid Gordon /cShalom Ratsabi.1 iTitle on t.p. verso:aAnarchy in "Zion" :bbetween Martin Buber and A.D. Gordon 6880-03a[Tel Aviv] :bʻAm ʻoved :bha-Makhon le-ḥeḳer ha-Tsiyonut ve-Yiśra'el ʻa. sh. Ḥayim Ṿaitsman, Universiṭat Tel Aviv,cc2011. a339 p. ;c23 cm. aIncludes bibliographical references (p. 320-330) and index. 3Eisenhower copy:cPurchased with support from the National Endowment for the Humanities;dFY2012.5MdBJ.10aBuber, Martin,d1878-1965xReligion.10aGordon, Aaron David,d1856-1922xReligion. 0aZionism and Judaism. 0aReligious Zionism. 0aZionismxHistoriography. 0aZionismxPhilosophy. aIsraeldTel Aviv.1 6100-01/(2/raרצבי, שלום.106245-02/(2/raאנרכיזם ב״ציון״ :bבין מרטין בובר לאהרן דוד גורדון /cשלום רצבי. 6260-03/(2/ra[תל אביב] :bעם עובד :bהמכון לחקר הציונות וישראל עʺש חיים ויצמן, אוניברסיטת תל אביב,cc2011. a4083985bHorizon bib# aPR 747581025 741225747 aA.I. WeinbergbWEINnwb2011369996 aDS149.R38 2011flcbelccc. 1q0i6085034lemainmemsel aC0bJHE
@@ -13,6 +13,14 @@ class TestField < Test::Unit::TestCase
13
13
  assert_not_equal(f1, f3)
14
14
  end
15
15
 
16
+ def test_alphabetic_tag
17
+ alph = MARC::DataField.new('ALF')
18
+ assert_equal 'ALF', alph.tag
19
+
20
+ alphnum = MARC::DataField.new('0D9')
21
+ assert_equal '0D9', alphnum.tag
22
+ end
23
+
16
24
  def test_indicators
17
25
  f1 = MARC::DataField.new('100', '0', '1')
18
26
  assert_equal('0', f1.indicator1)
@@ -72,5 +72,16 @@ class ReaderTest < Test::Unit::TestCase
72
72
  records = reader.find_all { |r| r =~ /Foo/ }
73
73
  assert_equal(0, records.length)
74
74
  end
75
+
76
+ def test_binary_enumerator
77
+ reader = MARC::Reader.new('test/batch.dat')
78
+ iter = reader.each
79
+ r = iter.next
80
+ assert_instance_of(MARC::Record, r)
81
+ 9.times {iter.next} # total of ten records
82
+ assert_raises(StopIteration) { iter.next }
83
+ end
84
+
85
+
75
86
 
76
87
  end
@@ -1,5 +1,18 @@
1
1
  require 'test/unit'
2
2
  require 'marc'
3
+ require 'xmlsimple'
4
+
5
+ def xml_cmp a, b
6
+ eq_all_but_zero = Object.new.instance_eval do
7
+ def ==(other)
8
+ Integer(other) == 0 ? false : true
9
+ end
10
+ self
11
+ end
12
+ a = XmlSimple.xml_in(a.to_s, 'normalisespace' => eq_all_but_zero)
13
+ b = XmlSimple.xml_in(b.to_s, 'normalisespace' => eq_all_but_zero)
14
+ a == b
15
+ end
3
16
 
4
17
  class TestRecord < Test::Unit::TestCase
5
18
 
@@ -12,12 +25,7 @@ class TestRecord < Test::Unit::TestCase
12
25
  r = get_record()
13
26
  doc = r.to_xml
14
27
  assert_kind_of REXML::Element, doc
15
- if RUBY_VERSION < '1.9.0'
16
- assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
17
- else
18
- # REXML inexplicably sorts the attributes alphabetically in Ruby 1.9
19
- assert_equal "<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield ind1='2' ind2='0' tag='100'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield ind1='0' ind2='4' tag='245'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s
20
- end
28
+ assert xml_cmp("<record xmlns='http://www.loc.gov/MARC21/slim'><leader> Z 22 4500</leader><datafield tag='100' ind1='2' ind2='0'><subfield code='a'>Thomas, Dave</subfield></datafield><datafield tag='245' ind1='0' ind2='4'><subfield code='The Pragmatic Programmer'></subfield></datafield></record>", doc.to_s)
21
29
  end
22
30
 
23
31
  def test_append_field
@@ -0,0 +1,62 @@
1
+ require 'test/unit'
2
+
3
+
4
+ # jruby 1.7.4 (1.9.3p392) 2013-05-16 2390d3b on Java HotSpot(TM) 64-Bit Server VM 1.6.0_51-b11-457-11M4509 [darwin-x86_64]
5
+ class TestField < Test::Unit::TestCase
6
+
7
+ def test_confused_bytecount
8
+
9
+
10
+
11
+
12
+
13
+ string_with_ctrl = "hello\x1fhello".force_encoding("UTF-8")
14
+ # control chars like \x1F ARE legal UTF-8, this is correct:
15
+ assert string_with_ctrl.valid_encoding?
16
+
17
+ # It's even considered ascii_only? -- this is correct, both MRI and jruby
18
+ assert string_with_ctrl.ascii_only?
19
+
20
+
21
+ # For reasons I can't explain, I can only reproduce the
22
+ # problem right now by doing a split, on the control char
23
+ # (this does represent my actual use case)
24
+ # Whether the split operand is tagged ASCII or UTF-8 does not matter,
25
+ # case is identical either way.
26
+ elements = string_with_ctrl.split("\x1F".force_encoding("UTF-8"))
27
+
28
+ # For some reason weirdness only happens on the second one in the split
29
+ # in this case.
30
+ second = elements[1]
31
+
32
+
33
+ # For a string composed of all one-byte wide ascii, as this one is...
34
+ assert_equal "hello", second
35
+ assert second.ascii_only?
36
+
37
+ # string[0] and string.byteslice(0) shoudl be identical. They are
38
+ # different when the string contains multi-byte chars.
39
+ # using #[], we're okay
40
+ assert_equal "h", second[0]
41
+
42
+ # But on jruby, this following actually raises an exception!
43
+ assert_equal "h", second.byteslice(0)
44
+ # That one up there actually just raised!!!
45
+ # Java::JavaLang::ArrayIndexOutOfBoundsException: 12
46
+ # org.jruby.util.ByteList.equal(ByteList.java:960)
47
+
48
+ # In other cases I saw in my real app, it didn't raise, but
49
+ # did return the WRONG bytes. Ie, not a 'h' above as expected, or
50
+ # not:
51
+
52
+
53
+ assert_equal second[0], second.byteslice(0)
54
+ # but in jruby we never even get here, we raise.
55
+
56
+ # In MRI, we pass ALL these tests with no exceptions.
57
+ # (ruby 1.9.3p448 (2013-06-27 revision 41675) [x86_64-darwin12.4.0])
58
+ end
59
+
60
+
61
+
62
+ end
@@ -157,6 +157,25 @@ class XMLTest < Test::Unit::TestCase
157
157
 
158
158
  File.unlink('test/test.xml')
159
159
  end
160
+
161
+ def test_xml_enumerator
162
+ @parsers.each do | parser |
163
+ puts "\nRunning test_xml_enumerator with: #{parser}.\n"
164
+ xml_enumerator_test(parser)
165
+ end
166
+ end
167
+
168
+
169
+ def xml_enumerator_test(parser)
170
+ # confusingly, test/batch.xml only has two records, not 10 like batch.dat
171
+ reader = MARC::XMLReader.new('test/batch.xml', :parser=>parser)
172
+ iter = reader.each
173
+ r = iter.next
174
+ assert_instance_of(MARC::Record, r)
175
+ iter.next # total of two records
176
+ assert_raises(StopIteration) { iter.next }
177
+ end
178
+
160
179
 
161
180
  end
162
181
 
File without changes
metadata CHANGED
@@ -1,8 +1,8 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: marc
3
3
  version: !ruby/object:Gem::Version
4
- prerelease:
5
- version: 0.5.1
4
+ version: 0.6.0
5
+ prerelease:
6
6
  platform: ruby
7
7
  authors:
8
8
  - Kevin Clarke
@@ -13,15 +13,14 @@ authors:
13
13
  autorequire: marc
14
14
  bindir: bin
15
15
  cert_chain: []
16
- date: 2013-07-11 00:00:00.000000000 Z
16
+ date: 2013-08-19 00:00:00.000000000 Z
17
17
  dependencies: []
18
- description:
18
+ description:
19
19
  email: ehs@pobox.com
20
20
  executables: []
21
21
  extensions: []
22
22
  extra_rdoc_files: []
23
23
  files:
24
- - lib/marc.rb
25
24
  - lib/marc/constants.rb
26
25
  - lib/marc/controlfield.rb
27
26
  - lib/marc/datafield.rb
@@ -35,10 +34,12 @@ files:
35
34
  - lib/marc/xml_parsers.rb
36
35
  - lib/marc/xmlreader.rb
37
36
  - lib/marc/xmlwriter.rb
37
+ - lib/marc.rb
38
38
  - test/batch.dat
39
39
  - test/batch.xml
40
40
  - test/cp866_multirecord.marc
41
41
  - test/cp866_unimarc.marc
42
+ - test/hebrew880s.marc
42
43
  - test/marc8_accented_chars.marc
43
44
  - test/no-leading-zero.xml
44
45
  - test/non-numeric.dat
@@ -57,6 +58,7 @@ files:
57
58
  - test/tc_reader_char_encodings.rb
58
59
  - test/tc_record.rb
59
60
  - test/tc_subfield.rb
61
+ - test/tc_weird_jruby_bytes.rb
60
62
  - test/tc_writer.rb
61
63
  - test/tc_xml.rb
62
64
  - test/ts_marc.rb
@@ -69,26 +71,26 @@ files:
69
71
  - LICENSE
70
72
  homepage: https://github.com/ruby-marc/ruby-marc/
71
73
  licenses: []
72
- post_install_message:
74
+ post_install_message:
73
75
  rdoc_options: []
74
76
  require_paths:
75
77
  - lib
76
78
  required_ruby_version: !ruby/object:Gem::Requirement
79
+ none: false
77
80
  requirements:
78
- - - '>='
81
+ - - ! '>='
79
82
  - !ruby/object:Gem::Version
80
83
  version: 1.8.6
81
- none: false
82
84
  required_rubygems_version: !ruby/object:Gem::Requirement
85
+ none: false
83
86
  requirements:
84
- - - '>='
87
+ - - ! '>='
85
88
  - !ruby/object:Gem::Version
86
89
  version: '0'
87
- none: false
88
90
  requirements: []
89
- rubyforge_project:
90
- rubygems_version: 1.8.24
91
- signing_key:
91
+ rubyforge_project:
92
+ rubygems_version: 1.8.23
93
+ signing_key:
92
94
  specification_version: 3
93
95
  summary: A ruby library for working with Machine Readable Cataloging
94
96
  test_files: