onix 0.8.5 → 0.9.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -1
- data/README.markdown +4 -0
- data/lib/onix.rb +2 -2
- data/lib/onix/reader.rb +29 -7
- data/spec/reader_spec.rb +40 -9
- metadata +31 -25
data/CHANGELOG
CHANGED
@@ -1,6 +1,12 @@
|
|
1
|
-
v0.
|
1
|
+
v0.9.0 (14th April 2011)
|
2
2
|
- switch back to the vanilla roxml gem. Ben is maintaining it again and
|
3
3
|
he has merged in my bug fixes
|
4
|
+
- clarify comments explaining encoding behaviour
|
5
|
+
- Add options hash to ONIX::Reader. Only option at this stage is :encoding,
|
6
|
+
which allows the user to override the assumed encoding of the input XML
|
7
|
+
- API change, so new minor version
|
8
|
+
|
9
|
+
v0.8.5 (21st December 2010)
|
4
10
|
- update packaging - use bundler and rspec 2.x
|
5
11
|
- support normalising short tag files that include HTML tags
|
6
12
|
|
data/README.markdown
CHANGED
@@ -21,6 +21,10 @@ ONIX::Normaliser to convert any short tag files to reference tags.
|
|
21
21
|
|
22
22
|
ONIX::Writer only generates reference tag ONIX files.
|
23
23
|
|
24
|
+
It baffles me why anyone thought designing two parallel versions of the ONIX
|
25
|
+
spec was a good idea. Use reference tags my friends, and let short tags fade
|
26
|
+
away into irrelevant obscurity.
|
27
|
+
|
24
28
|
## DTD Loading
|
25
29
|
|
26
30
|
To correctly handle named entities when reading an ONIX file, this gem attempts
|
data/lib/onix.rb
CHANGED
data/lib/onix/reader.rb
CHANGED
@@ -25,7 +25,7 @@ module ONIX
|
|
25
25
|
# in a shim that provides simple accessor access to common attributes, pass the
|
26
26
|
# shim class as a second argument
|
27
27
|
#
|
28
|
-
# reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
|
28
|
+
# reader = ONIX::Reader.new("somefile.xml", :product_class => ONIX::APAProduct)
|
29
29
|
#
|
30
30
|
# puts reader.header.inspect
|
31
31
|
#
|
@@ -39,7 +39,7 @@ module ONIX
|
|
39
39
|
# As well as accessing the file header, there are handful of other read only
|
40
40
|
# attributes that might be useful
|
41
41
|
#
|
42
|
-
# reader = ONIX::Reader.new("somefile.xml"
|
42
|
+
# reader = ONIX::Reader.new("somefile.xml")
|
43
43
|
#
|
44
44
|
# puts reader.version
|
45
45
|
# puts reader.xml_lang
|
@@ -50,23 +50,45 @@ module ONIX
|
|
50
50
|
# ONIX spec, and you may need to handle the file differently based on what
|
51
51
|
# version it is.
|
52
52
|
#
|
53
|
+
# == File Encoding
|
54
|
+
#
|
55
|
+
# ONIX::Reader returns all strings as UTF-8. Source file encoding is detected by
|
56
|
+
# the encoding declaration at the top of the file, like so:
|
57
|
+
#
|
58
|
+
# <?xml version="1.0" encoding="iso-8859-1"?>
|
59
|
+
#
|
60
|
+
# If the encoding declaration is missing the file is assumed to be UTF-8.
|
61
|
+
#
|
62
|
+
# If the encoding declaration is missing or wrong and the file isn't UTF-8,
|
63
|
+
# you can manually set or override it like so:
|
64
|
+
#
|
65
|
+
# reader = ONIX::Reader.new("somefile.xml", :encoding => "iso-8859-1")
|
66
|
+
#
|
67
|
+
# If the file contains invalid bytes for the source encoding an exception will
|
68
|
+
# be raised. This isn't ideal, but I'm still looking for ways to make this
|
69
|
+
# behaviour configurable.
|
70
|
+
#
|
53
71
|
class Reader
|
54
72
|
include Enumerable
|
55
73
|
|
56
74
|
attr_reader :header, :release
|
57
75
|
|
58
|
-
def initialize(input,
|
76
|
+
def initialize(input, *args)
|
77
|
+
opts = args.last.kind_of?(Hash) ? args.pop : {}
|
78
|
+
if args.size > 0
|
79
|
+
ActiveSupport::Deprecation.warn("Passing a klass as ONIX::Reader's second argument is deprecated, use the :product_class option instead", caller)
|
80
|
+
end
|
81
|
+
@product_klass = opts[:product_class] || args.pop || ::ONIX::Product
|
82
|
+
|
59
83
|
if input.kind_of?(String)
|
60
84
|
@file = File.open(input, "r")
|
61
|
-
@reader = Nokogiri::XML::Reader(@file) { |cfg| cfg.dtdload.noent }
|
85
|
+
@reader = Nokogiri::XML::Reader(@file, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
|
62
86
|
elsif input.kind_of?(IO)
|
63
|
-
@reader = Nokogiri::XML::Reader(input) { |cfg| cfg.dtdload.noent }
|
87
|
+
@reader = Nokogiri::XML::Reader(input, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
|
64
88
|
else
|
65
89
|
raise ArgumentError, "Unable to read from file or IO stream"
|
66
90
|
end
|
67
91
|
|
68
|
-
@product_klass = product_klass
|
69
|
-
|
70
92
|
@release = find_release
|
71
93
|
@header = find_header
|
72
94
|
|
data/spec/reader_spec.rb
CHANGED
@@ -12,6 +12,7 @@ describe ONIX::Reader do
|
|
12
12
|
@entity_file = File.join(@data_path, "entities.xml")
|
13
13
|
@utf_16_file = File.join(@data_path, "utf_16.xml")
|
14
14
|
@iso_8859_1_file = File.join(@data_path, "iso_8859_1.xml")
|
15
|
+
@no_encoding_decl_file = File.join(@data_path, "aau.xml")
|
15
16
|
end
|
16
17
|
|
17
18
|
it "should initialize with a filename" do
|
@@ -90,19 +91,36 @@ describe ONIX::Reader do
|
|
90
91
|
|
91
92
|
it "should transparently convert a iso-8859-1 file to utf-8" do
|
92
93
|
reader = ONIX::Reader.new(@iso_8859_1_file)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
reader.each do |product|
|
95
|
+
if RUBY_VERSION >= "1.9"
|
96
|
+
utf8 = Encoding.find("utf-8")
|
97
|
+
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
98
|
+
end
|
97
99
|
|
98
|
-
|
99
|
-
if RUBY_VERSION >= "1.9"
|
100
|
-
utf8 = Encoding.find("utf-8")
|
101
|
-
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
100
|
+
product.contributors[0].person_name_inverted.should eql("Küng, Hans")
|
102
101
|
end
|
102
|
+
end
|
103
103
|
|
104
|
-
|
104
|
+
# This isn't ideal behaviour, but i'm somewhat hamstrung by nokogiri API. It'd
|
105
|
+
# be nice to have the option to replace unrecognised bytes with a valid char.
|
106
|
+
it "should raise an exception when an iso-8859-1 file isn't declared as such" do
|
107
|
+
reader = ONIX::Reader.new(@no_encoding_decl_file)
|
108
|
+
lambda {
|
109
|
+
reader.each do |product|
|
110
|
+
end
|
111
|
+
}.should raise_error(Nokogiri::XML::SyntaxError)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should transparently convert an iso-8859-1 file to utf-8 when there's no declaration but the user manually specifies iso-8859-1" do
|
115
|
+
reader = ONIX::Reader.new(@no_encoding_decl_file, :encoding => "iso-8859-1")
|
116
|
+
reader.each do |product|
|
117
|
+
if RUBY_VERSION >= "1.9"
|
118
|
+
utf8 = Encoding.find("utf-8")
|
119
|
+
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
120
|
+
end
|
105
121
|
|
122
|
+
product.contributors[0].person_name_inverted.should eql("Melo,Patr¡cia")
|
123
|
+
end
|
106
124
|
end
|
107
125
|
|
108
126
|
it "should transparently convert a utf-16 file to utf-8" do
|
@@ -119,6 +137,19 @@ describe ONIX::Reader do
|
|
119
137
|
end
|
120
138
|
|
121
139
|
product.contributors[0].person_name_inverted.should eql("Küng, Hans")
|
140
|
+
end
|
122
141
|
|
142
|
+
it "should support returning an APAProduct using deprecated API" do
|
143
|
+
reader = ONIX::Reader.new(@file1, ONIX::APAProduct)
|
144
|
+
reader.each do |product|
|
145
|
+
product.should be_a_kind_of(ONIX::APAProduct)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should support returning an APAProduct using new API" do
|
150
|
+
reader = ONIX::Reader.new(@file1, :product_class => ONIX::APAProduct)
|
151
|
+
reader.each do |product|
|
152
|
+
product.should be_a_kind_of(ONIX::APAProduct)
|
153
|
+
end
|
123
154
|
end
|
124
155
|
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 53
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
7
|
+
- 9
|
8
|
+
- 0
|
9
|
+
version: 0.9.0
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- James Healy
|
@@ -15,18 +14,17 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-14 00:00:00 +10:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
21
|
+
name: roxml
|
23
22
|
prerelease: false
|
24
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
24
|
none: false
|
26
25
|
requirements:
|
27
|
-
- -
|
26
|
+
- - ~>
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 15
|
30
28
|
segments:
|
31
29
|
- 3
|
32
30
|
- 1
|
@@ -35,77 +33,87 @@ dependencies:
|
|
35
33
|
type: :runtime
|
36
34
|
version_requirements: *id001
|
37
35
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
36
|
+
name: activesupport
|
39
37
|
prerelease: false
|
40
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
39
|
none: false
|
42
40
|
requirements:
|
43
|
-
- -
|
41
|
+
- - ~>
|
44
42
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 3
|
46
43
|
segments:
|
44
|
+
- 3
|
47
45
|
- 0
|
48
|
-
|
46
|
+
- 5
|
47
|
+
version: 3.0.5
|
49
48
|
type: :runtime
|
50
49
|
version_requirements: *id002
|
51
50
|
- !ruby/object:Gem::Dependency
|
52
|
-
name:
|
51
|
+
name: i18n
|
53
52
|
prerelease: false
|
54
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
55
54
|
none: false
|
56
55
|
requirements:
|
57
56
|
- - ">="
|
58
57
|
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
58
|
segments:
|
61
59
|
- 0
|
62
60
|
version: "0"
|
63
61
|
type: :runtime
|
64
62
|
version_requirements: *id003
|
65
63
|
- !ruby/object:Gem::Dependency
|
66
|
-
name:
|
64
|
+
name: andand
|
67
65
|
prerelease: false
|
68
66
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
67
|
none: false
|
70
68
|
requirements:
|
71
69
|
- - ">="
|
72
70
|
- !ruby/object:Gem::Version
|
73
|
-
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :runtime
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: nokogiri
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
74
84
|
segments:
|
75
85
|
- 1
|
76
86
|
- 4
|
77
87
|
version: "1.4"
|
78
88
|
type: :runtime
|
79
|
-
version_requirements: *
|
89
|
+
version_requirements: *id005
|
80
90
|
- !ruby/object:Gem::Dependency
|
81
91
|
name: rake
|
82
92
|
prerelease: false
|
83
|
-
requirement: &
|
93
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
84
94
|
none: false
|
85
95
|
requirements:
|
86
96
|
- - ">="
|
87
97
|
- !ruby/object:Gem::Version
|
88
|
-
hash: 3
|
89
98
|
segments:
|
90
99
|
- 0
|
91
100
|
version: "0"
|
92
101
|
type: :development
|
93
|
-
version_requirements: *
|
102
|
+
version_requirements: *id006
|
94
103
|
- !ruby/object:Gem::Dependency
|
95
104
|
name: rspec
|
96
105
|
prerelease: false
|
97
|
-
requirement: &
|
106
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
98
107
|
none: false
|
99
108
|
requirements:
|
100
109
|
- - ~>
|
101
110
|
- !ruby/object:Gem::Version
|
102
|
-
hash: 1
|
103
111
|
segments:
|
104
112
|
- 2
|
105
113
|
- 1
|
106
114
|
version: "2.1"
|
107
115
|
type: :development
|
108
|
-
version_requirements: *
|
116
|
+
version_requirements: *id007
|
109
117
|
description: A convient mapping between ruby objects and the ONIX XML specification
|
110
118
|
email:
|
111
119
|
- jimmy@deefa.com
|
@@ -367,7 +375,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
367
375
|
requirements:
|
368
376
|
- - ">="
|
369
377
|
- !ruby/object:Gem::Version
|
370
|
-
hash: 3
|
371
378
|
segments:
|
372
379
|
- 0
|
373
380
|
version: "0"
|
@@ -376,7 +383,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
376
383
|
requirements:
|
377
384
|
- - ">="
|
378
385
|
- !ruby/object:Gem::Version
|
379
|
-
hash: 3
|
380
386
|
segments:
|
381
387
|
- 0
|
382
388
|
version: "0"
|