marc 0.5.0 → 0.5.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -215,6 +215,10 @@ module MARC
215
215
  # declared on the string passed in.
216
216
  params[:external_encoding] = marc.encoding
217
217
  end
218
+ # And now that we've recorded the current encoding, we force
219
+ # to binary encoding, because we're going to be doing byte arithmetic,
220
+ # and want to avoid byte-vs-char confusion.
221
+ marc.force_encoding("binary") if marc.respond_to?(:force_encoding)
218
222
 
219
223
  record = Record.new()
220
224
  record.leader = marc[0..LEADER_LENGTH-1]
@@ -1,3 +1,3 @@
1
1
  module MARC
2
- VERSION = "0.5.0"
2
+ VERSION = "0.5.1"
3
3
  end
@@ -112,7 +112,7 @@ module MARC
112
112
 
113
113
  def method_missing(methName, *args)
114
114
  sax_methods = [:xmldecl, :start_document, :end_document, :start_element,
115
- :end_element, :comment, :warning, :error, :cdata_block]
115
+ :end_element, :comment, :warning, :error, :cdata_block, :processing_instruction]
116
116
  unless sax_methods.index(methName)
117
117
  raise NoMethodError.new("undefined method '#{methName} for #{self}", 'no_meth')
118
118
  end
@@ -293,7 +293,11 @@ module MARC
293
293
  end
294
294
  end
295
295
 
296
- module LibXMLReader
296
+
297
+
298
+
299
+ unless defined? JRUBY_VERSION
300
+ module LibXMLReader
297
301
 
298
302
  def self.extended(receiver)
299
303
  require 'xml'
@@ -344,6 +348,7 @@ module MARC
344
348
  return r
345
349
  end
346
350
  end
351
+ end
347
352
 
348
353
  # The JrubySTAXReader uses native java calls to parse the incoming stream
349
354
  # of marc-xml. It includes most of the work from GenericPullParser
@@ -352,9 +357,7 @@ module MARC
352
357
  module JRubySTAXReader
353
358
  include GenericPullParser
354
359
  def self.extended(receiver)
355
- include Java
356
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
357
- include javax.xml.stream
360
+ require 'java' # may only be neccesary in jruby 1.6
358
361
  receiver.init
359
362
  end
360
363
 
@@ -373,13 +376,13 @@ module MARC
373
376
  end
374
377
 
375
378
  def parser_dispatch
376
- while event = @parser.next and event != XMLStreamConstants.END_DOCUMENT do
379
+ while event = @parser.next and event != javax.xml.stream.XMLStreamConstants.END_DOCUMENT do
377
380
  case event
378
- when XMLStreamConstants.START_ELEMENT
381
+ when javax.xml.stream.XMLStreamConstants.START_ELEMENT
379
382
  start_element_namespace(@parser.getLocalName, [], nil, @parser.getNamespaceURI, nil)
380
- when XMLStreamConstants.END_ELEMENT
383
+ when javax.xml.stream.XMLStreamConstants.END_ELEMENT
381
384
  end_element_namespace(@parser.getLocalName, @parser.getPrefix, @parser.getNamespaceURI)
382
- when XMLStreamConstants.CHARACTERS
385
+ when javax.xml.stream.XMLStreamConstants.CHARACTERS
383
386
  characters(@parser.getText)
384
387
  end
385
388
  end
@@ -69,6 +69,7 @@ module MARC
69
69
  raise ArgumentError, "jstax only available under jruby" unless defined? JRUBY_VERSION
70
70
  extend JRubySTAXReader
71
71
  when 'libxml' then extend LibXMLReader
72
+ raise ArgumentError, "libxml not available under jruby" if defined? JRUBY_VERSION
72
73
  end
73
74
  end
74
75
 
@@ -95,14 +96,9 @@ module MARC
95
96
  # Returns the value of the best available parser
96
97
  def self.best_available
97
98
  parser = nil
98
- jruby = [USE_JSTAX, USE_NOKOGIRI, USE_JREXML]
99
+ jruby = [USE_NOKOGIRI, USE_JSTAX, USE_JREXML]
99
100
  ruby = [USE_NOKOGIRI, USE_LIBXML]
100
101
  if defined? JRUBY_VERSION
101
- begin
102
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
103
- parser = USE_JSTAX
104
- rescue java.lang.ClassNotFoundException
105
- end
106
102
  unless parser
107
103
  begin
108
104
  require 'nokogiri'
@@ -110,6 +106,14 @@ module MARC
110
106
  rescue LoadError
111
107
  end
112
108
  end
109
+ unless parser
110
+ begin
111
+ # try to find the class, so we throw an error if not found
112
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
113
+ parser = USE_JSTAX
114
+ rescue java.lang.ClassNotFoundException
115
+ end
116
+ end
113
117
  unless parser
114
118
  begin
115
119
  require 'jrexml'
@@ -123,13 +127,15 @@ module MARC
123
127
  parser = USE_NOKOGIRI
124
128
  rescue LoadError
125
129
  end
126
- unless parser
127
- begin
128
- require 'xml'
129
- parser = USE_LIBXML
130
- rescue LoadError
131
- end
132
- end
130
+ unless defined? JRUBY_VERSION
131
+ unless parser
132
+ begin
133
+ require 'xml'
134
+ parser = USE_LIBXML
135
+ rescue LoadError
136
+ end
137
+ end
138
+ end
133
139
  end
134
140
  parser = USE_REXML unless parser
135
141
  parser
@@ -164,15 +164,6 @@ end
164
164
  def choose_best_available_parser
165
165
  parser_name = nil
166
166
  parser = nil
167
- if defined? JRUBY_VERSION
168
- require 'java'
169
- begin
170
- java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
171
- parser_name = "jstax"
172
- parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
173
- rescue java.lang.ClassNotFoundException
174
- end
175
- end
176
167
  unless parser
177
168
  begin
178
169
  require 'nokogiri'
@@ -181,6 +172,17 @@ end
181
172
  rescue LoadError
182
173
  end
183
174
  end
175
+ unless parser
176
+ if defined? JRUBY_VERSION
177
+ require 'java'
178
+ begin
179
+ java.lang.Class.forName("javax.xml.stream.XMLInputFactory")
180
+ parser_name = "jstax"
181
+ parser = Java::ComSunOrgApacheXercesInternalImpl::XMLStreamReaderImpl
182
+ rescue java.lang.ClassNotFoundException
183
+ end
184
+ end
185
+ end
184
186
  unless parser
185
187
  if !defined? JRUBY_VERSION
186
188
  begin
@@ -10,10 +10,12 @@ class XMLTest < Test::Unit::TestCase
10
10
  @parsers << :nokogiri
11
11
  rescue LoadError
12
12
  end
13
- begin
14
- require 'xml'
15
- @parsers << :libxml
16
- rescue LoadError
13
+ unless defined? JRUBY_VERSION
14
+ begin
15
+ require 'xml'
16
+ @parsers << :libxml
17
+ rescue LoadError
18
+ end
17
19
  end
18
20
  if defined? JRUBY_VERSION
19
21
  begin
metadata CHANGED
@@ -1,15 +1,10 @@
1
- --- !ruby/object:Gem::Specification
1
+ --- !ruby/object:Gem::Specification
2
2
  name: marc
3
- version: !ruby/object:Gem::Version
4
- hash: 11
5
- prerelease:
6
- segments:
7
- - 0
8
- - 5
9
- - 0
10
- version: 0.5.0
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.5.1
11
6
  platform: ruby
12
- authors:
7
+ authors:
13
8
  - Kevin Clarke
14
9
  - Bill Dueber
15
10
  - William Groppe
@@ -18,104 +13,83 @@ authors:
18
13
  autorequire: marc
19
14
  bindir: bin
20
15
  cert_chain: []
21
-
22
- date: 2012-05-07 00:00:00 Z
16
+ date: 2013-07-11 00:00:00.000000000 Z
23
17
  dependencies: []
24
-
25
- description:
18
+ description:
26
19
  email: ehs@pobox.com
27
20
  executables: []
28
-
29
21
  extensions: []
30
-
31
22
  extra_rdoc_files: []
32
-
33
- files:
34
- - lib/marc/xml_parsers.rb
23
+ files:
24
+ - lib/marc.rb
25
+ - lib/marc/constants.rb
35
26
  - lib/marc/controlfield.rb
36
- - lib/marc/reader.rb
37
- - lib/marc/dublincore.rb
38
- - lib/marc/xmlwriter.rb
39
27
  - lib/marc/datafield.rb
40
- - lib/marc/record.rb
28
+ - lib/marc/dublincore.rb
41
29
  - lib/marc/exception.rb
30
+ - lib/marc/reader.rb
31
+ - lib/marc/record.rb
32
+ - lib/marc/subfield.rb
33
+ - lib/marc/version.rb
42
34
  - lib/marc/writer.rb
35
+ - lib/marc/xml_parsers.rb
43
36
  - lib/marc/xmlreader.rb
44
- - lib/marc/version.rb
45
- - lib/marc/constants.rb
46
- - lib/marc/subfield.rb
47
- - lib/marc.rb
48
- - test/one.dat
49
- - test/one.xml
37
+ - lib/marc/xmlwriter.rb
38
+ - test/batch.dat
50
39
  - test/batch.xml
51
40
  - test/cp866_multirecord.marc
52
- - test/random_tag_order.dat
41
+ - test/cp866_unimarc.marc
42
+ - test/marc8_accented_chars.marc
53
43
  - test/no-leading-zero.xml
54
- - test/tc_record.rb
55
- - test/tc_parsers.rb
56
- - test/tc_hash.rb
57
- - test/tc_subfield.rb
58
44
  - test/non-numeric.dat
59
- - test/test_cp866.txt
60
- - test/batch.dat
61
- - test/jruby_bad_transcode.rb
45
+ - test/non-numeric.xml
46
+ - test/one.dat
47
+ - test/one.xml
48
+ - test/random_tag_order.dat
49
+ - test/random_tag_order2.dat
62
50
  - test/tc_controlfield.rb
63
- - test/tc_bare_ruby_strings.rb
64
- - test/tc_marchash.rb
65
- - test/marc8_accented_chars.marc
66
- - test/cp866_unimarc.marc
67
51
  - test/tc_datafield.rb
68
- - test/tc_reader_char_encodings.rb
69
- - test/utf8_with_bad_bytes.marc
70
- - test/utf8.marc
71
- - test/bare_cp866.txt
72
52
  - test/tc_dublincore.rb
73
- - test/utf8_multirecord.marc
53
+ - test/tc_hash.rb
54
+ - test/tc_marchash.rb
55
+ - test/tc_parsers.rb
56
+ - test/tc_reader.rb
57
+ - test/tc_reader_char_encodings.rb
58
+ - test/tc_record.rb
59
+ - test/tc_subfield.rb
74
60
  - test/tc_writer.rb
75
61
  - test/tc_xml.rb
76
- - test/non-numeric.xml
77
- - test/random_tag_order2.dat
78
62
  - test/ts_marc.rb
79
- - test/tc_reader.rb
80
- - test/jruby_just_string.rb
63
+ - test/utf8.marc
64
+ - test/utf8_multirecord.marc
65
+ - test/utf8_with_bad_bytes.marc
81
66
  - Rakefile
82
67
  - README.md
83
68
  - Changes
84
69
  - LICENSE
85
70
  homepage: https://github.com/ruby-marc/ruby-marc/
86
71
  licenses: []
87
-
88
- post_install_message:
72
+ post_install_message:
89
73
  rdoc_options: []
90
-
91
- require_paths:
74
+ require_paths:
92
75
  - lib
93
- required_ruby_version: !ruby/object:Gem::Requirement
94
- none: false
95
- requirements:
96
- - - ">="
97
- - !ruby/object:Gem::Version
98
- hash: 59
99
- segments:
100
- - 1
101
- - 8
102
- - 6
76
+ required_ruby_version: !ruby/object:Gem::Requirement
77
+ requirements:
78
+ - - '>='
79
+ - !ruby/object:Gem::Version
103
80
  version: 1.8.6
104
- required_rubygems_version: !ruby/object:Gem::Requirement
105
81
  none: false
106
- requirements:
107
- - - ">="
108
- - !ruby/object:Gem::Version
109
- hash: 3
110
- segments:
111
- - 0
112
- version: "0"
82
+ required_rubygems_version: !ruby/object:Gem::Requirement
83
+ requirements:
84
+ - - '>='
85
+ - !ruby/object:Gem::Version
86
+ version: '0'
87
+ none: false
113
88
  requirements: []
114
-
115
- rubyforge_project:
116
- rubygems_version: 1.8.23
117
- signing_key:
89
+ rubyforge_project:
90
+ rubygems_version: 1.8.24
91
+ signing_key:
118
92
  specification_version: 3
119
93
  summary: A ruby library for working with Machine Readable Cataloging
120
- test_files:
94
+ test_files:
121
95
  - test/ts_marc.rb
@@ -1 +0,0 @@
1
- ����㭠�. ������ ��� ����⬠��
@@ -1,52 +0,0 @@
1
- # encoding: utf-8
2
-
3
- # 1.9.3p0 :005 > 0x8D.chr.force_encoding("cp866").encode("UTF-8")
4
- utf8 = "Н".force_encoding("UTF-8")
5
-
6
- puts "There's a cyrillic letter that looks kinda like a capital H. Here's what it looks like in unicode: Н"
7
-
8
- puts "In unicode, that's byte array: " + utf8.bytes.to_a.inspect
9
-
10
- puts "We're gonna use String#encode to convert it to an IBM866 encoding, also known as cp866, an encoding sometimes used in Russia."
11
-
12
-
13
- puts " `utf8.encode(\"IBM866\")`"
14
-
15
- cp866 = utf8.encode("IBM866")
16
- puts cp866.bytes.to_a.inspect
17
-
18
- exit
19
-
20
- puts
21
- puts "In cp866, the actual bytes are: #{cp866_phrase.bytes.to_a.inspect}"
22
- puts
23
-
24
- puts "We're going to write the cp866 string to disk, using binary:binary to try and make sure we get the bytes to disk without transcoding."
25
-
26
- write = File.open("test_cp866.txt", "w", :internal_encoding => "binary", :external_encoding => "binary")
27
- write.puts cp866_phrase
28
- write.close
29
- puts
30
-
31
- puts "Now we're going to read it in with a File object with external_encoding set to IBM866, but no internal_encoding set."
32
-
33
- puts
34
- puts "Make sure we have no default internal_encoding: " + Encoding.default_internal.nil?.inspect
35
-
36
- read = File.open("test_cp866.txt", :external_encoding => "cp866")
37
- puts
38
- puts "Our ruby file object should have external_encoding of IBM866: " + read.external_encoding.inspect
39
- puts " and internal_encoding nil: " + read.internal_encoding.inspect
40
-
41
- puts
42
-
43
- read_in_string = read.read
44
- read.close
45
-
46
- puts "The encoding of the string we read in should be IBM866: " + (read_in_string.encoding.name == "IBM866").inspect
47
-
48
- puts
49
- puts "And the bytes should be the very same bytes we wrote out (which are valid cp866) " + (read_in_string.bytes.to_a[0,3] == [140, 165, 166]).inspect + " (#{read_in_string.bytes.to_a})"
50
-
51
- puts "The above is TRUE in MRI 1.9.3, but FALSE in jruby "
52
-
@@ -1,39 +0,0 @@
1
- # encoding: binary
2
-
3
- # jruby 1.6.7 (ruby-1.9.2-p312) (2012-02-22 3e82bc8) (Java HotSpot(TM) 64-Bit Server VM 1.6.0_20) [linux-amd64-java]
4
-
5
- # There is a letter in cyrillic that looks kind of like a capital
6
- # H. In the cp866 encoding (http://en.wikipedia.org/wiki/Code_page_866)
7
- # it's represented by "\x8D" which is decimal 141.
8
- #
9
- # In ruby 1.9, it _ought_ to be possible to have those bytes
10
- # in a string, and tell ruby it's cp866.
11
-
12
- cp866 = "\x8D".force_encoding("IBM866")
13
-
14
- # in MRI 1.9.3, if we inspect that, we get "\x8D", just like we expect.
15
- # and if we look at #bytes.to_a, we get [141], just like we expect.
16
- puts cp866.inspect
17
- puts cp866.bytes.to_a.inspect
18
- # However, in jruby if we #inspect instead of getting "\x8D",
19
- # we get "\u008D" -- this is wrong, it's NOT that unicode codepoint.
20
- # In jruby, bytes.to_a.inspect is still [141], it hasn't changed
21
- # the bytes, but it's confused about what's going on.
22
-
23
- # We see this encoding confusion demonstrated if we try
24
- # a String#encode.
25
- #
26
- # MRI 1.9.3 is perfectly capable of transcoding this to UTF-8
27
-
28
- utf8 = cp866.encode("UTF-8")
29
- puts utf8.inspect # => in MRI displays cyrillic in terminal no prob
30
- puts utf8.bytes.to_a.inspect # => in MRI [208, 157], proper bytes for utf8
31
-
32
- # In jruby, puts utf8.inspect displays "\u008D", and
33
- # utf8.bytes.to_a.inspect is [194, 141]. I don't know where the
34
- # 191 came from, but it has NOT succesfully transcoded to utf8.
35
-
36
- # In other cases, the #encode will actually raise an illegal byte
37
- # exception if the original bytes were not legal for UTF8 (or UTF16?) --
38
- # but the original bytes were not meant to be considered unicode at all.
39
-
@@ -1,43 +0,0 @@
1
- require 'test/unit'
2
-
3
- class TestBareRubyStrings < Test::Unit::TestCase
4
-
5
- # The file bare_cp866.txt has in it a phrase encoded in cp866,
6
- # that if it were translated to utf8 would be:
7
- # "Междунар. новости мира пластмасс\n"
8
- #
9
- # The first few bytes of that in utf8 are:
10
- # "\xD0\x9C\xD0\xB5"
11
- #
12
- # In cp866 as it is on disk, it's first few bytes are "\x8C\xA5"
13
-
14
- def test_read_cp866_with_external_encoding
15
- return
16
- file = File.open("test/bare_cp866.txt", "r:cp866")
17
- string = file.read
18
-
19
- assert_equal "IBM866", string.encoding.name
20
-
21
- cp866_binary = string.dup.force_encoding("binary")
22
- assert cp866_binary.start_with?( "\x8C\xA5".force_encoding("binary") )
23
-
24
- transcoded = string.encode("UTF-8")
25
- assert_equal "UTF-8", transcoded.encoding.name
26
-
27
- utf8_binary = transcoded.dup.force_encoding("binary")
28
-
29
- assert utf8_binary.start_with?( "\xD0\x9C\xD0\xB5".force_encoding("binary"))
30
- end
31
-
32
- def test_read_cp866_binary_all_the_way
33
- # tell ruby to treat it as binary binary binary
34
- file = File.open("test/bare_cp866.txt", :external_encoding => "binary", :internal_encoding => "binary")
35
-
36
- string = file.read
37
-
38
- # we should get the same bytes that were on disk, right?
39
- assert string.start_with?( "\x8C\xA5".force_encoding("binary"))
40
- end
41
-
42
-
43
- end
@@ -1 +0,0 @@
1
- ���