onix 0.8.5 → 0.9.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,6 +1,12 @@
1
- v0.8.5 (21st December 2010)
1
+ v0.9.0 (14th April 2011)
2
2
  - switch back to the vanilla roxml gem. Ben is maintaining it again and
3
3
  he has merged in my bug fixes
4
+ - clarify comments explaining encoding behaviour
5
+ - Add options hash to ONIX::Reader. Only option at this stage is :encoding,
6
+ which allows the user to override the assumed encoding of the input XML
7
+ - API change, so new minor version
8
+
9
+ v0.8.5 (21st December 2010)
4
10
  - update packaging - use bundler and rspec 2.x
5
11
  - support normalising short tag files that include HTML tags
6
12
 
@@ -21,6 +21,10 @@ ONIX::Normaliser to convert any short tag files to reference tags.
21
21
 
22
22
  ONIX::Writer only generates reference tag ONIX files.
23
23
 
24
+ It baffles me why anyone thought designing two parallel versions of the ONIX
25
+ spec was a good idea. Use reference tags my friends, and let short tags fade
26
+ away into irrelevant obscurity.
27
+
24
28
  ## DTD Loading
25
29
 
26
30
  To correctly handle named entities when reading an ONIX file, this gem attempts
@@ -9,8 +9,8 @@ require 'andand'
9
9
  module ONIX
10
10
  module Version #:nodoc:
11
11
  Major = 0
12
- Minor = 8
13
- Tiny = 5
12
+ Minor = 9
13
+ Tiny = 0
14
14
 
15
15
  String = [Major, Minor, Tiny].join('.')
16
16
  end
@@ -25,7 +25,7 @@ module ONIX
25
25
  # in a shim that provides simple accessor access to common attributes, pass the
26
26
  # shim class as a second argument
27
27
  #
28
- # reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
28
+ # reader = ONIX::Reader.new("somefile.xml", :product_class => ONIX::APAProduct)
29
29
  #
30
30
  # puts reader.header.inspect
31
31
  #
@@ -39,7 +39,7 @@ module ONIX
39
39
  # As well as accessing the file header, there are handful of other read only
40
40
  # attributes that might be useful
41
41
  #
42
- # reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
42
+ # reader = ONIX::Reader.new("somefile.xml")
43
43
  #
44
44
  # puts reader.version
45
45
  # puts reader.xml_lang
@@ -50,23 +50,45 @@ module ONIX
50
50
  # ONIX spec, and you may need to handle the file differently based on what
51
51
  # version it is.
52
52
  #
53
+ # == File Encoding
54
+ #
55
+ # ONIX::Reader returns all strings as UTF-8. Source file encoding is detected by
56
+ # the encoding declaration at the top of the file, like so:
57
+ #
58
+ # <?xml version="1.0" encoding="iso-8859-1"?>
59
+ #
60
+ # If the encoding declaration is missing the file is assumed to be UTF-8.
61
+ #
62
+ # If the encoding declaration is missing or wrong and the file isn't UTF-8,
63
+ # you can manually set or override it like so:
64
+ #
65
+ # reader = ONIX::Reader.new("somefile.xml", :encoding => "iso-8859-1")
66
+ #
67
+ # If the file contains invalid bytes for the source encoding an exception will
68
+ # be raised. This isn't ideal, but I'm still looking for ways to make this
69
+ # behaviour configurable.
70
+ #
53
71
  class Reader
54
72
  include Enumerable
55
73
 
56
74
  attr_reader :header, :release
57
75
 
58
- def initialize(input, product_klass = ::ONIX::Product)
76
+ def initialize(input, *args)
77
+ opts = args.last.kind_of?(Hash) ? args.pop : {}
78
+ if args.size > 0
79
+ ActiveSupport::Deprecation.warn("Passing a klass as ONIX::Reader's second argument is deprecated, use the :product_class option instead", caller)
80
+ end
81
+ @product_klass = opts[:product_class] || args.pop || ::ONIX::Product
82
+
59
83
  if input.kind_of?(String)
60
84
  @file = File.open(input, "r")
61
- @reader = Nokogiri::XML::Reader(@file) { |cfg| cfg.dtdload.noent }
85
+ @reader = Nokogiri::XML::Reader(@file, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
62
86
  elsif input.kind_of?(IO)
63
- @reader = Nokogiri::XML::Reader(input) { |cfg| cfg.dtdload.noent }
87
+ @reader = Nokogiri::XML::Reader(input, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
64
88
  else
65
89
  raise ArgumentError, "Unable to read from file or IO stream"
66
90
  end
67
91
 
68
- @product_klass = product_klass
69
-
70
92
  @release = find_release
71
93
  @header = find_header
72
94
 
@@ -12,6 +12,7 @@ describe ONIX::Reader do
12
12
  @entity_file = File.join(@data_path, "entities.xml")
13
13
  @utf_16_file = File.join(@data_path, "utf_16.xml")
14
14
  @iso_8859_1_file = File.join(@data_path, "iso_8859_1.xml")
15
+ @no_encoding_decl_file = File.join(@data_path, "aau.xml")
15
16
  end
16
17
 
17
18
  it "should initialize with a filename" do
@@ -90,19 +91,36 @@ describe ONIX::Reader do
90
91
 
91
92
  it "should transparently convert a iso-8859-1 file to utf-8" do
92
93
  reader = ONIX::Reader.new(@iso_8859_1_file)
93
- product = nil
94
- reader.each do |p|
95
- product = p
96
- end
94
+ reader.each do |product|
95
+ if RUBY_VERSION >= "1.9"
96
+ utf8 = Encoding.find("utf-8")
97
+ product.contributors[0].person_name_inverted.encoding.should eql(utf8)
98
+ end
97
99
 
98
- # ROXML appears to munge the string encodings
99
- if RUBY_VERSION >= "1.9"
100
- utf8 = Encoding.find("utf-8")
101
- product.contributors[0].person_name_inverted.encoding.should eql(utf8)
100
+ product.contributors[0].person_name_inverted.should eql("Küng, Hans")
102
101
  end
102
+ end
103
103
 
104
- product.contributors[0].person_name_inverted.should eql("Küng, Hans")
104
+ # This isn't ideal behaviour, but i'm somewhat hamstrung by nokogiri API. It'd
105
+ # be nice to have the option to replace unrecognised bytes with a valid char.
106
+ it "should raise an exception when an iso-8859-1 file isn't declared as such" do
107
+ reader = ONIX::Reader.new(@no_encoding_decl_file)
108
+ lambda {
109
+ reader.each do |product|
110
+ end
111
+ }.should raise_error(Nokogiri::XML::SyntaxError)
112
+ end
113
+
114
+ it "should transparently convert an iso-8859-1 file to utf-8 when there's no declaration but the user manually specifies iso-8859-1" do
115
+ reader = ONIX::Reader.new(@no_encoding_decl_file, :encoding => "iso-8859-1")
116
+ reader.each do |product|
117
+ if RUBY_VERSION >= "1.9"
118
+ utf8 = Encoding.find("utf-8")
119
+ product.contributors[0].person_name_inverted.encoding.should eql(utf8)
120
+ end
105
121
 
122
+ product.contributors[0].person_name_inverted.should eql("Melo,Patr¡cia")
123
+ end
106
124
  end
107
125
 
108
126
  it "should transparently convert a utf-16 file to utf-8" do
@@ -119,6 +137,19 @@ describe ONIX::Reader do
119
137
  end
120
138
 
121
139
  product.contributors[0].person_name_inverted.should eql("Küng, Hans")
140
+ end
122
141
 
142
+ it "should support returning an APAProduct using deprecated API" do
143
+ reader = ONIX::Reader.new(@file1, ONIX::APAProduct)
144
+ reader.each do |product|
145
+ product.should be_a_kind_of(ONIX::APAProduct)
146
+ end
147
+ end
148
+
149
+ it "should support returning an APAProduct using new API" do
150
+ reader = ONIX::Reader.new(@file1, :product_class => ONIX::APAProduct)
151
+ reader.each do |product|
152
+ product.should be_a_kind_of(ONIX::APAProduct)
153
+ end
123
154
  end
124
155
  end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onix
3
3
  version: !ruby/object:Gem::Version
4
- hash: 53
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
- - 8
9
- - 5
10
- version: 0.8.5
7
+ - 9
8
+ - 0
9
+ version: 0.9.0
11
10
  platform: ruby
12
11
  authors:
13
12
  - James Healy
@@ -15,18 +14,17 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-21 00:00:00 +11:00
17
+ date: 2011-04-14 00:00:00 +10:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
- name: yob-roxml
21
+ name: roxml
23
22
  prerelease: false
24
23
  requirement: &id001 !ruby/object:Gem::Requirement
25
24
  none: false
26
25
  requirements:
27
- - - ">="
26
+ - - ~>
28
27
  - !ruby/object:Gem::Version
29
- hash: 15
30
28
  segments:
31
29
  - 3
32
30
  - 1
@@ -35,77 +33,87 @@ dependencies:
35
33
  type: :runtime
36
34
  version_requirements: *id001
37
35
  - !ruby/object:Gem::Dependency
38
- name: i18n
36
+ name: activesupport
39
37
  prerelease: false
40
38
  requirement: &id002 !ruby/object:Gem::Requirement
41
39
  none: false
42
40
  requirements:
43
- - - ">="
41
+ - - ~>
44
42
  - !ruby/object:Gem::Version
45
- hash: 3
46
43
  segments:
44
+ - 3
47
45
  - 0
48
- version: "0"
46
+ - 5
47
+ version: 3.0.5
49
48
  type: :runtime
50
49
  version_requirements: *id002
51
50
  - !ruby/object:Gem::Dependency
52
- name: andand
51
+ name: i18n
53
52
  prerelease: false
54
53
  requirement: &id003 !ruby/object:Gem::Requirement
55
54
  none: false
56
55
  requirements:
57
56
  - - ">="
58
57
  - !ruby/object:Gem::Version
59
- hash: 3
60
58
  segments:
61
59
  - 0
62
60
  version: "0"
63
61
  type: :runtime
64
62
  version_requirements: *id003
65
63
  - !ruby/object:Gem::Dependency
66
- name: nokogiri
64
+ name: andand
67
65
  prerelease: false
68
66
  requirement: &id004 !ruby/object:Gem::Requirement
69
67
  none: false
70
68
  requirements:
71
69
  - - ">="
72
70
  - !ruby/object:Gem::Version
73
- hash: 7
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :runtime
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: nokogiri
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
74
84
  segments:
75
85
  - 1
76
86
  - 4
77
87
  version: "1.4"
78
88
  type: :runtime
79
- version_requirements: *id004
89
+ version_requirements: *id005
80
90
  - !ruby/object:Gem::Dependency
81
91
  name: rake
82
92
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
93
+ requirement: &id006 !ruby/object:Gem::Requirement
84
94
  none: false
85
95
  requirements:
86
96
  - - ">="
87
97
  - !ruby/object:Gem::Version
88
- hash: 3
89
98
  segments:
90
99
  - 0
91
100
  version: "0"
92
101
  type: :development
93
- version_requirements: *id005
102
+ version_requirements: *id006
94
103
  - !ruby/object:Gem::Dependency
95
104
  name: rspec
96
105
  prerelease: false
97
- requirement: &id006 !ruby/object:Gem::Requirement
106
+ requirement: &id007 !ruby/object:Gem::Requirement
98
107
  none: false
99
108
  requirements:
100
109
  - - ~>
101
110
  - !ruby/object:Gem::Version
102
- hash: 1
103
111
  segments:
104
112
  - 2
105
113
  - 1
106
114
  version: "2.1"
107
115
  type: :development
108
- version_requirements: *id006
116
+ version_requirements: *id007
109
117
  description: A convient mapping between ruby objects and the ONIX XML specification
110
118
  email:
111
119
  - jimmy@deefa.com
@@ -367,7 +375,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
367
375
  requirements:
368
376
  - - ">="
369
377
  - !ruby/object:Gem::Version
370
- hash: 3
371
378
  segments:
372
379
  - 0
373
380
  version: "0"
@@ -376,7 +383,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
376
383
  requirements:
377
384
  - - ">="
378
385
  - !ruby/object:Gem::Version
379
- hash: 3
380
386
  segments:
381
387
  - 0
382
388
  version: "0"