onix 0.8.5 → 0.9.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,6 +1,12 @@
1
- v0.8.5 (21st December 2010)
1
+ v0.9.0 (14th April 2011)
2
2
  - switch back to the vanilla roxml gem. Ben is maintaining it again and
3
3
  he has merged in my bug fixes
4
+ - clarify comments explaining encoding behaviour
5
+ - Add options hash to ONIX::Reader. Only option at this stage is :encoding,
6
+ which allows the user to override the assumed encoding of the input XML
7
+ - API change, so new minor version
8
+
9
+ v0.8.5 (21st December 2010)
4
10
  - update packaging - use bundler and rspec 2.x
5
11
  - support normalising short tag files that include HTML tags
6
12
 
@@ -21,6 +21,10 @@ ONIX::Normaliser to convert any short tag files to reference tags.
21
21
 
22
22
  ONIX::Writer only generates reference tag ONIX files.
23
23
 
24
+ It baffles me why anyone thought designing two parallel versions of the ONIX
25
+ spec was a good idea. Use reference tags my friends, and let short tags fade
26
+ away into irrelevant obscurity.
27
+
24
28
  ## DTD Loading
25
29
 
26
30
  To correctly handle named entities when reading an ONIX file, this gem attempts
@@ -9,8 +9,8 @@ require 'andand'
9
9
  module ONIX
10
10
  module Version #:nodoc:
11
11
  Major = 0
12
- Minor = 8
13
- Tiny = 5
12
+ Minor = 9
13
+ Tiny = 0
14
14
 
15
15
  String = [Major, Minor, Tiny].join('.')
16
16
  end
@@ -25,7 +25,7 @@ module ONIX
25
25
  # in a shim that provides simple accessor access to common attributes, pass the
26
26
  # shim class as a second argument
27
27
  #
28
- # reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
28
+ # reader = ONIX::Reader.new("somefile.xml", :product_class => ONIX::APAProduct)
29
29
  #
30
30
  # puts reader.header.inspect
31
31
  #
@@ -39,7 +39,7 @@ module ONIX
39
39
  # As well as accessing the file header, there are handful of other read only
40
40
  # attributes that might be useful
41
41
  #
42
- # reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
42
+ # reader = ONIX::Reader.new("somefile.xml")
43
43
  #
44
44
  # puts reader.version
45
45
  # puts reader.xml_lang
@@ -50,23 +50,45 @@ module ONIX
50
50
  # ONIX spec, and you may need to handle the file differently based on what
51
51
  # version it is.
52
52
  #
53
+ # == File Encoding
54
+ #
55
+ # ONIX::Reader returns all strings as UTF-8. Source file encoding is detected by
56
+ # the encoding declaration at the top of the file, like so:
57
+ #
58
+ # <?xml version="1.0" encoding="iso-8859-1"?>
59
+ #
60
+ # If the encoding declaration is missing the file is assumed to be UTF-8.
61
+ #
62
+ # If the encoding declaration is missing or wrong and the file isn't UTF-8,
63
+ # you can manually set or override it like so:
64
+ #
65
+ # reader = ONIX::Reader.new("somefile.xml", :encoding => "iso-8859-1")
66
+ #
67
+ # If the file contains invalid bytes for the source encoding an exception will
68
+ # be raised. This isn't ideal, but I'm still looking for ways to make this
69
+ # behaviour configurable.
70
+ #
53
71
  class Reader
54
72
  include Enumerable
55
73
 
56
74
  attr_reader :header, :release
57
75
 
58
- def initialize(input, product_klass = ::ONIX::Product)
76
+ def initialize(input, *args)
77
+ opts = args.last.kind_of?(Hash) ? args.pop : {}
78
+ if args.size > 0
79
+ ActiveSupport::Deprecation.warn("Passing a klass as ONIX::Reader's second argument is deprecated, use the :product_class option instead", caller)
80
+ end
81
+ @product_klass = opts[:product_class] || args.pop || ::ONIX::Product
82
+
59
83
  if input.kind_of?(String)
60
84
  @file = File.open(input, "r")
61
- @reader = Nokogiri::XML::Reader(@file) { |cfg| cfg.dtdload.noent }
85
+ @reader = Nokogiri::XML::Reader(@file, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
62
86
  elsif input.kind_of?(IO)
63
- @reader = Nokogiri::XML::Reader(input) { |cfg| cfg.dtdload.noent }
87
+ @reader = Nokogiri::XML::Reader(input, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
64
88
  else
65
89
  raise ArgumentError, "Unable to read from file or IO stream"
66
90
  end
67
91
 
68
- @product_klass = product_klass
69
-
70
92
  @release = find_release
71
93
  @header = find_header
72
94
 
@@ -12,6 +12,7 @@ describe ONIX::Reader do
12
12
  @entity_file = File.join(@data_path, "entities.xml")
13
13
  @utf_16_file = File.join(@data_path, "utf_16.xml")
14
14
  @iso_8859_1_file = File.join(@data_path, "iso_8859_1.xml")
15
+ @no_encoding_decl_file = File.join(@data_path, "aau.xml")
15
16
  end
16
17
 
17
18
  it "should initialize with a filename" do
@@ -90,19 +91,36 @@ describe ONIX::Reader do
90
91
 
91
92
  it "should transparently convert a iso-8859-1 file to utf-8" do
92
93
  reader = ONIX::Reader.new(@iso_8859_1_file)
93
- product = nil
94
- reader.each do |p|
95
- product = p
96
- end
94
+ reader.each do |product|
95
+ if RUBY_VERSION >= "1.9"
96
+ utf8 = Encoding.find("utf-8")
97
+ product.contributors[0].person_name_inverted.encoding.should eql(utf8)
98
+ end
97
99
 
98
- # ROXML appears to munge the string encodings
99
- if RUBY_VERSION >= "1.9"
100
- utf8 = Encoding.find("utf-8")
101
- product.contributors[0].person_name_inverted.encoding.should eql(utf8)
100
+ product.contributors[0].person_name_inverted.should eql("Küng, Hans")
102
101
  end
102
+ end
103
103
 
104
- product.contributors[0].person_name_inverted.should eql("Küng, Hans")
104
+ # This isn't ideal behaviour, but i'm somewhat hamstrung by nokogiri API. It'd
105
+ # be nice to have the option to replace unrecognised bytes with a valid char.
106
+ it "should raise an exception when an iso-8859-1 file isn't declared as such" do
107
+ reader = ONIX::Reader.new(@no_encoding_decl_file)
108
+ lambda {
109
+ reader.each do |product|
110
+ end
111
+ }.should raise_error(Nokogiri::XML::SyntaxError)
112
+ end
113
+
114
+ it "should transparently convert an iso-8859-1 file to utf-8 when there's no declaration but the user manually specifies iso-8859-1" do
115
+ reader = ONIX::Reader.new(@no_encoding_decl_file, :encoding => "iso-8859-1")
116
+ reader.each do |product|
117
+ if RUBY_VERSION >= "1.9"
118
+ utf8 = Encoding.find("utf-8")
119
+ product.contributors[0].person_name_inverted.encoding.should eql(utf8)
120
+ end
105
121
 
122
+ product.contributors[0].person_name_inverted.should eql("Melo,Patr¡cia")
123
+ end
106
124
  end
107
125
 
108
126
  it "should transparently convert a utf-16 file to utf-8" do
@@ -119,6 +137,19 @@ describe ONIX::Reader do
119
137
  end
120
138
 
121
139
  product.contributors[0].person_name_inverted.should eql("Küng, Hans")
140
+ end
122
141
 
142
+ it "should support returning an APAProduct using deprecated API" do
143
+ reader = ONIX::Reader.new(@file1, ONIX::APAProduct)
144
+ reader.each do |product|
145
+ product.should be_a_kind_of(ONIX::APAProduct)
146
+ end
147
+ end
148
+
149
+ it "should support returning an APAProduct using new API" do
150
+ reader = ONIX::Reader.new(@file1, :product_class => ONIX::APAProduct)
151
+ reader.each do |product|
152
+ product.should be_a_kind_of(ONIX::APAProduct)
153
+ end
123
154
  end
124
155
  end
metadata CHANGED
@@ -1,13 +1,12 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onix
3
3
  version: !ruby/object:Gem::Version
4
- hash: 53
5
4
  prerelease: false
6
5
  segments:
7
6
  - 0
8
- - 8
9
- - 5
10
- version: 0.8.5
7
+ - 9
8
+ - 0
9
+ version: 0.9.0
11
10
  platform: ruby
12
11
  authors:
13
12
  - James Healy
@@ -15,18 +14,17 @@ autorequire:
15
14
  bindir: bin
16
15
  cert_chain: []
17
16
 
18
- date: 2010-12-21 00:00:00 +11:00
17
+ date: 2011-04-14 00:00:00 +10:00
19
18
  default_executable:
20
19
  dependencies:
21
20
  - !ruby/object:Gem::Dependency
22
- name: yob-roxml
21
+ name: roxml
23
22
  prerelease: false
24
23
  requirement: &id001 !ruby/object:Gem::Requirement
25
24
  none: false
26
25
  requirements:
27
- - - ">="
26
+ - - ~>
28
27
  - !ruby/object:Gem::Version
29
- hash: 15
30
28
  segments:
31
29
  - 3
32
30
  - 1
@@ -35,77 +33,87 @@ dependencies:
35
33
  type: :runtime
36
34
  version_requirements: *id001
37
35
  - !ruby/object:Gem::Dependency
38
- name: i18n
36
+ name: activesupport
39
37
  prerelease: false
40
38
  requirement: &id002 !ruby/object:Gem::Requirement
41
39
  none: false
42
40
  requirements:
43
- - - ">="
41
+ - - ~>
44
42
  - !ruby/object:Gem::Version
45
- hash: 3
46
43
  segments:
44
+ - 3
47
45
  - 0
48
- version: "0"
46
+ - 5
47
+ version: 3.0.5
49
48
  type: :runtime
50
49
  version_requirements: *id002
51
50
  - !ruby/object:Gem::Dependency
52
- name: andand
51
+ name: i18n
53
52
  prerelease: false
54
53
  requirement: &id003 !ruby/object:Gem::Requirement
55
54
  none: false
56
55
  requirements:
57
56
  - - ">="
58
57
  - !ruby/object:Gem::Version
59
- hash: 3
60
58
  segments:
61
59
  - 0
62
60
  version: "0"
63
61
  type: :runtime
64
62
  version_requirements: *id003
65
63
  - !ruby/object:Gem::Dependency
66
- name: nokogiri
64
+ name: andand
67
65
  prerelease: false
68
66
  requirement: &id004 !ruby/object:Gem::Requirement
69
67
  none: false
70
68
  requirements:
71
69
  - - ">="
72
70
  - !ruby/object:Gem::Version
73
- hash: 7
71
+ segments:
72
+ - 0
73
+ version: "0"
74
+ type: :runtime
75
+ version_requirements: *id004
76
+ - !ruby/object:Gem::Dependency
77
+ name: nokogiri
78
+ prerelease: false
79
+ requirement: &id005 !ruby/object:Gem::Requirement
80
+ none: false
81
+ requirements:
82
+ - - ">="
83
+ - !ruby/object:Gem::Version
74
84
  segments:
75
85
  - 1
76
86
  - 4
77
87
  version: "1.4"
78
88
  type: :runtime
79
- version_requirements: *id004
89
+ version_requirements: *id005
80
90
  - !ruby/object:Gem::Dependency
81
91
  name: rake
82
92
  prerelease: false
83
- requirement: &id005 !ruby/object:Gem::Requirement
93
+ requirement: &id006 !ruby/object:Gem::Requirement
84
94
  none: false
85
95
  requirements:
86
96
  - - ">="
87
97
  - !ruby/object:Gem::Version
88
- hash: 3
89
98
  segments:
90
99
  - 0
91
100
  version: "0"
92
101
  type: :development
93
- version_requirements: *id005
102
+ version_requirements: *id006
94
103
  - !ruby/object:Gem::Dependency
95
104
  name: rspec
96
105
  prerelease: false
97
- requirement: &id006 !ruby/object:Gem::Requirement
106
+ requirement: &id007 !ruby/object:Gem::Requirement
98
107
  none: false
99
108
  requirements:
100
109
  - - ~>
101
110
  - !ruby/object:Gem::Version
102
- hash: 1
103
111
  segments:
104
112
  - 2
105
113
  - 1
106
114
  version: "2.1"
107
115
  type: :development
108
- version_requirements: *id006
116
+ version_requirements: *id007
109
117
  description: A convient mapping between ruby objects and the ONIX XML specification
110
118
  email:
111
119
  - jimmy@deefa.com
@@ -367,7 +375,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
367
375
  requirements:
368
376
  - - ">="
369
377
  - !ruby/object:Gem::Version
370
- hash: 3
371
378
  segments:
372
379
  - 0
373
380
  version: "0"
@@ -376,7 +383,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
376
383
  requirements:
377
384
  - - ">="
378
385
  - !ruby/object:Gem::Version
379
- hash: 3
380
386
  segments:
381
387
  - 0
382
388
  version: "0"