marc 0.5.0 → 0.5.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -215,6 +215,10 @@ module MARC
215
215
  # declared on the string passed in.
216
216
  params[:external_encoding] = marc.encoding
217
217
  end
218
+ # And now that we've recorded the current encoding, we force
219
+ # to binary encoding, because we're going to be doing byte arithmetic,
220
+ # and want to avoid byte-vs-char confusion.
221
+ marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
218
222
 
219
223
  record = Record.new()
220
224
  record.leader = marc[0..LEADER_LENGTH-1]
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
@@ -112,7 +112,7 @@ module MARC
112
112
 
113
113
  def method_missing(methName, *args)
114
114
  sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
115
- :end_element, :comment, :warning, :error, :cdata_block]
115
+ :end_element, :comment, :warning, :error, :cdata_block, :processing_instruction]
116
116
  unless sax_methods.index(methName)
117
117
  raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
118
118
  end
@@ -293,7 +293,11 @@ module MARC
293
293
  end
294
294
  end
295
295
 
296
- module LibXMLReader
296
+
297
+
298
+
299
+ unless defined? JRUBY_VERSION
300
+ module LibXMLReader
297
301
 
298
302
  def self.extended(receiver)
299
303
  require 'xml'
@@ -344,6 +348,7 @@ module MARC
344
348
  return r
345
349
  end
346
350
  end
351
+ end
347
352
 
348
353
  # The JrubySTAXReader uses native java calls to parse the incoming stream
349
354
  # of marc-xml. It includes most of the work from GenericPullParser
@@ -352,9 +357,7 @@ module MARC
352
357
  module JRubySTAXReader
353
358
  include GenericPullParser
354
359
  def self.extended(receiver)
355
- include Java
356
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
357
- include javax.xml.stream
360
+ require 'java' # may only be neccesary in jruby 1.6
358
361
  receiver.init
359
362
  end
360
363
 
@@ -373,13 +376,13 @@ module MARC
373
376
  end
374
377
 
375
378
  def parser_dispatch
376
- while event = @parser.next and event != XMLStreamConstants.END_DOCUMENT do
379
+ while event = @parser.next and event != javax.xml.stream.XMLStreamConstants.END_DOCUMENT do
377
380
  case event
378
- when XMLStreamConstants.START_ELEMENT
381
+ when javax.xml.stream.XMLStreamConstants.START_ELEMENT
379
382
  start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
380
- when XMLStreamConstants.END_ELEMENT
383
+ when javax.xml.stream.XMLStreamConstants.END_ELEMENT
381
384
  end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
382
- when XMLStreamConstants.CHARACTERS
385
+ when javax.xml.stream.XMLStreamConstants.CHARACTERS
383
386
  characters(@parser.getText)
384
387
  end
385
388
  end
@@ -69,6 +69,7 @@ module MARC
69
69
  raise ArgumentError, "jstax only available under jruby" unless defined? JRUBY_VERSION
70
70
  extend JRubySTAXReader
71
71
  when 'libxml' then extend LibXMLReader
72
+ raise ArgumentError, "libxml not available under jruby" if defined? JRUBY_VERSION
72
73
  end
73
74
  end
74
75
 
@@ -95,14 +96,9 @@ module MARC
95
96
  # Returns the value of the best available parser
96
97
  def self.best_available
97
98
  parser = nil
98
- jruby = [USE_JSTAX, USE_NOKOGIRI, USE_JREXML]
99
+ jruby = [USE_NOKOGIRI, USE_JSTAX, USE_JREXML]
99
100
  ruby = [USE_NOKOGIRI, USE_LIBXML]
100
101
  if defined? JRUBY_VERSION
101
- begin
102
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
103
- parser = USE_JSTAX
104
- rescue java.lang.ClassNotFoundException
105
- end
106
102
  unless parser
107
103
  begin
108
104
  require 'nokogiri'
@@ -110,6 +106,14 @@ module MARC
110
106
  rescue LoadError
111
107
  end
112
108
  end
109
+ unless parser
110
+ begin
111
+ # try to find the class, so we throw an error if not found
112
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
113
+ parser = USE_JSTAX
114
+ rescue java.lang.ClassNotFoundException
115
+ end
116
+ end
113
117
  unless parser
114
118
  begin
115
119
  require 'jrexml'
@@ -123,13 +127,15 @@ module MARC
123
127
  parser = USE_NOKOGIRI
124
128
  rescue LoadError
125
129
  end
126
- unless parser
127
- begin
128
- require 'xml'
129
- parser = USE_LIBXML
130
- rescue LoadError
131
- end
132
- end
130
+ unless defined? JRUBY_VERSION
131
+ unless parser
132
+ begin
133
+ require 'xml'
134
+ parser = USE_LIBXML
135
+ rescue LoadError
136
+ end
137
+ end
138
+ end
133
139
  end
134
140
  parser = USE_REXML unless parser
135
141
  parser
@@ -164,15 +164,6 @@ end
164
164
  def choose_best_available_parser
165
165
  parser_name = nil
166
166
  parser = nil
167
- if defined? JRUBY_VERSION
168
- require 'java'
169
- begin
170
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
171
- parser_name = "jstax"
172
- parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
173
- rescue java.lang.ClassNotFoundException
174
- end
175
- end
176
167
  unless parser
177
168
  begin
178
169
  require 'nokogiri'
@@ -181,6 +172,17 @@ end
181
172
  rescue LoadError
182
173
  end
183
174
  end
175
+ unless parser
176
+ if defined? JRUBY_VERSION
177
+ require 'java'
178
+ begin
179
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
180
+ parser_name = "jstax"
181
+ parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
182
+ rescue java.lang.ClassNotFoundException
183
+ end
184
+ end
185
+ end
184
186
  unless parser
185
187
  if !defined? JRUBY_VERSION
186
188
  begin
@@ -10,10 +10,12 @@ class XMLTest < Test::Unit::TestCase
10
10
  @parsers << :nokogiri
11
11
  rescue LoadError
12
12
  end
13
- begin
14
- require 'xml'
15
- @parsers << :libxml
16
- rescue LoadError
13
+ unless defined? JRUBY_VERSION
14
+ begin
15
+ require 'xml'
16
+ @parsers << :libxml
17
+ rescue LoadError
18
+ end
17
19
  end
18
20
  if defined? JRUBY_VERSION
19
21
  begin
metadata CHANGED
@@ -1,15 +1,10 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: marc
3
- version: !ruby/object:Gem::Version
4
- hash: 11
5
- prerelease:
6
- segments:
7
- - 0
8
- - 5
9
- - 0
10
- version: 0.5.0
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.5.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Kevin Clarke
14
9
  - Bill Dueber
15
10
  - William Groppe
@@ -18,104 +13,83 @@ authors:
18
13
  autorequire: marc
19
14
  bindir: bin
20
15
  cert_chain: []
21
-
22
- date: 2012-05-07 00:00:00 Z
16
+ date: 2013-07-11 00:00:00.000000000 Z
23
17
  dependencies: []
24
-
25
- description:
18
+ description:
26
19
  email: ehs@pobox.com
27
20
  executables: []
28
-
29
21
  extensions: []
30
-
31
22
  extra_rdoc_files: []
32
-
33
- files:
34
- - lib/marc/xml_parsers.rb
23
+ files:
24
+ - lib/marc.rb
25
+ - lib/marc/constants.rb
35
26
  - lib/marc/controlfield.rb
36
- - lib/marc/reader.rb
37
- - lib/marc/dublincore.rb
38
- - lib/marc/xmlwriter.rb
39
27
  - lib/marc/datafield.rb
40
- - lib/marc/record.rb
28
+ - lib/marc/dublincore.rb
41
29
  - lib/marc/exception.rb
30
+ - lib/marc/reader.rb
31
+ - lib/marc/record.rb
32
+ - lib/marc/subfield.rb
33
+ - lib/marc/version.rb
42
34
  - lib/marc/writer.rb
35
+ - lib/marc/xml_parsers.rb
43
36
  - lib/marc/xmlreader.rb
44
- - lib/marc/version.rb
45
- - lib/marc/constants.rb
46
- - lib/marc/subfield.rb
47
- - lib/marc.rb
48
- - test/one.dat
49
- - test/one.xml
37
+ - lib/marc/xmlwriter.rb
38
+ - test/batch.dat
50
39
  - test/batch.xml
51
40
  - test/cp866_multirecord.marc
52
- - test/random_tag_order.dat
41
+ - test/cp866_unimarc.marc
42
+ - test/marc8_accented_chars.marc
53
43
  - test/no-leading-zero.xml
54
- - test/tc_record.rb
55
- - test/tc_parsers.rb
56
- - test/tc_hash.rb
57
- - test/tc_subfield.rb
58
44
  - test/non-numeric.dat
59
- - test/test_cp866.txt
60
- - test/batch.dat
61
- - test/jruby_bad_transcode.rb
45
+ - test/non-numeric.xml
46
+ - test/one.dat
47
+ - test/one.xml
48
+ - test/random_tag_order.dat
49
+ - test/random_tag_order2.dat
62
50
  - test/tc_controlfield.rb
63
- - test/tc_bare_ruby_strings.rb
64
- - test/tc_marchash.rb
65
- - test/marc8_accented_chars.marc
66
- - test/cp866_unimarc.marc
67
51
  - test/tc_datafield.rb
68
- - test/tc_reader_char_encodings.rb
69
- - test/utf8_with_bad_bytes.marc
70
- - test/utf8.marc
71
- - test/bare_cp866.txt
72
52
  - test/tc_dublincore.rb
73
- - test/utf8_multirecord.marc
53
+ - test/tc_hash.rb
54
+ - test/tc_marchash.rb
55
+ - test/tc_parsers.rb
56
+ - test/tc_reader.rb
57
+ - test/tc_reader_char_encodings.rb
58
+ - test/tc_record.rb
59
+ - test/tc_subfield.rb
74
60
  - test/tc_writer.rb
75
61
  - test/tc_xml.rb
76
- - test/non-numeric.xml
77
- - test/random_tag_order2.dat
78
62
  - test/ts_marc.rb
79
- - test/tc_reader.rb
80
- - test/jruby_just_string.rb
63
+ - test/utf8.marc
64
+ - test/utf8_multirecord.marc
65
+ - test/utf8_with_bad_bytes.marc
81
66
  - Rakefile
82
67
  - README.md
83
68
  - Changes
84
69
  - LICENSE
85
70
  homepage: https://github.com/ruby-marc/ruby-marc/
86
71
  licenses: []
87
-
88
- post_install_message:
72
+ post_install_message:
89
73
  rdoc_options: []
90
-
91
- require_paths:
74
+ require_paths:
92
75
  - lib
93
- required_ruby_version: !ruby/object:Gem::Requirement
94
- none: false
95
- requirements:
96
- - - ">="
97
- - !ruby/object:Gem::Version
98
- hash: 59
99
- segments:
100
- - 1
101
- - 8
102
- - 6
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
103
80
  version: 1.8.6
104
- required_rubygems_version: !ruby/object:Gem::Requirement
105
81
  none: false
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- hash: 3
110
- segments:
111
- - 0
112
- version: "0"
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ none: false
113
88
  requirements: []
114
-
115
- rubyforge_project:
116
- rubygems_version: 1.8.23
117
- signing_key:
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
118
92
  specification_version: 3
119
93
  summary: A ruby library for working with Machine Readable Cataloging
120
- test_files:
94
+ test_files:
121
95
  - test/ts_marc.rb
@@ -1 +0,0 @@
1
- ����㭠�. ������ ��� ����⬠��
@@ -1,52 +0,0 @@
1
- # encoding: utf-8
2
-
3
- # 1.9.3p0 :005 > 0x8D.chr.force_encoding("cp866").encode("UTF-8")
4
- utf8 = "Н".force_encoding("UTF-8")
5
-
6
- puts "There's a cyrillic letter that looks kinda like a capital H. Here's what it looks like in unicode: Н"
7
-
8
- puts "In unicode, that's byte array: " + utf8.bytes.to_a.inspect
9
-
10
- puts "We're gonna use String#encode to convert it to an IBM866 encoding, also known as cp866, an encoding sometimes used in Russia."
11
-
12
-
13
- puts " `utf8.encode(\"IBM866\")`"
14
-
15
- cp866 = utf8.encode("IBM866")
16
- puts cp866.bytes.to_a.inspect
17
-
18
- exit
19
-
20
- puts
21
- puts "In cp866, the actual bytes are: #{cp866_phrase.bytes.to_a.inspect}"
22
- puts
23
-
24
- puts "We're going to write the cp866 string to disk, using binary:binary to try and make sure we get the bytes to disk without transcoding."
25
-
26
- write = File.open("test_cp866.txt", "w", :internal_encoding => "binary", :external_encoding => "binary")
27
- write.puts cp866_phrase
28
- write.close
29
- puts
30
-
31
- puts "Now we're going to read it in with a File object with external_encoding set to IBM866, but no internal_encoding set."
32
-
33
- puts
34
- puts "Make sure we have no default internal_encoding: " + Encoding.default_internal.nil?.inspect
35
-
36
- read = File.open("test_cp866.txt", :external_encoding => "cp866")
37
- puts
38
- puts "Our ruby file object should have external_encoding of IBM866: " + read.external_encoding.inspect
39
- puts " and internal_encoding nil: " + read.internal_encoding.inspect
40
-
41
- puts
42
-
43
- read_in_string = read.read
44
- read.close
45
-
46
- puts "The encoding of the string we read in should be IBM866: " + (read_in_string.encoding.name == "IBM866").inspect
47
-
48
- puts
49
- puts "And the bytes should be the very same bytes we wrote out (which are valid cp866) " + (read_in_string.bytes.to_a[0,3] == [140, 165, 166]).inspect + " (#{read_in_string.bytes.to_a})"
50
-
51
- puts "The above is TRUE in MRI 1.9.3, but FALSE in jruby "
52
-
@@ -1,39 +0,0 @@
1
- # encoding: binary
2
-
3
- # jruby 1.6.7 (ruby-1.9.2-p312) (2012-02-22 3e82bc8) (Java HotSpot(TM) 64-Bit Server VM 1.6.0_20) [linux-amd64-java]
4
-
5
- # There is a letter in cyrillic that looks kind of like a capital
6
- # H. In the cp866 encoding (http://en.wikipedia.org/wiki/Code_page_866)
7
- # it's represented by "\x8D" which is decimal 141.
8
- #
9
- # In ruby 1.9, it _ought_ to be possible to have those bytes
10
- # in a string, and tell ruby it's cp866.
11
-
12
- cp866 = "\x8D".force_encoding("IBM866")
13
-
14
- # in MRI 1.9.3, if we inspect that, we get "\x8D", just like we expect.
15
- # and if we look at #bytes.to_a, we get [141], just like we expect.
16
- puts cp866.inspect
17
- puts cp866.bytes.to_a.inspect
18
- # However, in jruby if we #inspect instead of getting "\x8D",
19
- # we get "\u008D" -- this is wrong, it's NOT that unicode codepoint.
20
- # In jruby, bytes.to_a.inspect is still [141], it hasn't changed
21
- # the bytes, but it's confused about what's going on.
22
-
23
- # We see this encoding confusion demonstrated if we try
24
- # a String#encode.
25
- #
26
- # MRI 1.9.3 is perfectly capable of transcoding this to UTF-8
27
-
28
- utf8 = cp866.encode("UTF-8")
29
- puts utf8.inspect # => in MRI displays cyrillic in terminal no prob
30
- puts utf8.bytes.to_a.inspect # => in MRI [208, 157], proper bytes for utf8
31
-
32
- # In jruby, puts utf8.inspect displays "\u008D", and
33
- # utf8.bytes.to_a.inspect is [194, 141]. I don't know where the
34
- # 191 came from, but it has NOT succesfully transcoded to utf8.
35
-
36
- # In other cases, the #encode will actually raise an illegal byte
37
- # exception if the original bytes were not legal for UTF8 (or UTF16?) --
38
- # but the original bytes were not meant to be considered unicode at all.
39
-
@@ -1,43 +0,0 @@
1
- require 'test/unit'
2
-
3
- class TestBareRubyStrings < Test::Unit::TestCase
4
-
5
- # The file bare_cp866.txt has in it a phrase encoded in cp866,
6
- # that if it were translated to utf8 would be:
7
- # "Междунар. новости мира пластмасс\n"
8
- #
9
- # The first few bytes of that in utf8 are:
10
- # "\xD0\x9C\xD0\xB5"
11
- #
12
- # In cp866 as it is on disk, it's first few bytes are "\x8C\xA5"
13
-
14
- def test_read_cp866_with_external_encoding
15
- return
16
- file = File.open("test/bare_cp866.txt", "r:cp866")
17
- string = file.read
18
-
19
- assert_equal "IBM866", string.encoding.name
20
-
21
- cp866_binary = string.dup.force_encoding("binary")
22
- assert cp866_binary.start_with?( "\x8C\xA5".force_encoding("binary") )
23
-
24
- transcoded = string.encode("UTF-8")
25
- assert_equal "UTF-8", transcoded.encoding.name
26
-
27
- utf8_binary = transcoded.dup.force_encoding("binary")
28
-
29
- assert utf8_binary.start_with?( "\xD0\x9C\xD0\xB5".force_encoding("binary"))
30
- end
31
-
32
- def test_read_cp866_binary_all_the_way
33
- # tell ruby to treat it as binary binary binary
34
- file = File.open("test/bare_cp866.txt", :external_encoding => "binary", :internal_encoding => "binary")
35
-
36
- string = file.read
37
-
38
- # we should get the same bytes that were on disk, right?
39
- assert string.start_with?( "\x8C\xA5".force_encoding("binary"))
40
- end
41
-
42
-
43
- end
@@ -1 +0,0 @@
1
- ���