onix 0.8.5 → 0.9.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -1
- data/README.markdown +4 -0
- data/lib/onix.rb +2 -2
- data/lib/onix/reader.rb +29 -7
- data/spec/reader_spec.rb +40 -9
- metadata +31 -25
data/CHANGELOG
CHANGED
@@ -1,6 +1,12 @@
|
|
1
|
-
v0.
|
1
|
+
v0.9.0 (14th April 2011)
|
2
2
|
- switch back to the vanilla roxml gem. Ben is maintaining it again and
|
3
3
|
he has merged in my bug fixes
|
4
|
+
- clarify comments explaining encoding behaviour
|
5
|
+
- Add options hash to ONIX::Reader. Only option at this stage is :encoding,
|
6
|
+
which allows the user to override the assumed encoding of the input XML
|
7
|
+
- API change, so new minor version
|
8
|
+
|
9
|
+
v0.8.5 (21st December 2010)
|
4
10
|
- update packaging - use bundler and rspec 2.x
|
5
11
|
- support normalising short tag files that include HTML tags
|
6
12
|
|
data/README.markdown
CHANGED
@@ -21,6 +21,10 @@ ONIX::Normaliser to convert any short tag files to reference tags.
|
|
21
21
|
|
22
22
|
ONIX::Writer only generates reference tag ONIX files.
|
23
23
|
|
24
|
+
It baffles me why anyone thought designing two parallel versions of the ONIX
|
25
|
+
spec was a good idea. Use reference tags my friends, and let short tags fade
|
26
|
+
away into irrelevant obscurity.
|
27
|
+
|
24
28
|
## DTD Loading
|
25
29
|
|
26
30
|
To correctly handle named entities when reading an ONIX file, this gem attempts
|
data/lib/onix.rb
CHANGED
data/lib/onix/reader.rb
CHANGED
@@ -25,7 +25,7 @@ module ONIX
|
|
25
25
|
# in a shim that provides simple accessor access to common attributes, pass the
|
26
26
|
# shim class as a second argument
|
27
27
|
#
|
28
|
-
# reader = ONIX::Reader.new("somefile.xml", ONIX::APAProduct)
|
28
|
+
# reader = ONIX::Reader.new("somefile.xml", :product_class => ONIX::APAProduct)
|
29
29
|
#
|
30
30
|
# puts reader.header.inspect
|
31
31
|
#
|
@@ -39,7 +39,7 @@ module ONIX
|
|
39
39
|
# As well as accessing the file header, there are handful of other read only
|
40
40
|
# attributes that might be useful
|
41
41
|
#
|
42
|
-
# reader = ONIX::Reader.new("somefile.xml"
|
42
|
+
# reader = ONIX::Reader.new("somefile.xml")
|
43
43
|
#
|
44
44
|
# puts reader.version
|
45
45
|
# puts reader.xml_lang
|
@@ -50,23 +50,45 @@ module ONIX
|
|
50
50
|
# ONIX spec, and you may need to handle the file differently based on what
|
51
51
|
# version it is.
|
52
52
|
#
|
53
|
+
# == File Encoding
|
54
|
+
#
|
55
|
+
# ONIX::Reader returns all strings as UTF-8. Source file encoding is detected by
|
56
|
+
# the encoding declaration at the top of the file, like so:
|
57
|
+
#
|
58
|
+
# <?xml version="1.0" encoding="iso-8859-1"?>
|
59
|
+
#
|
60
|
+
# If the encoding declaration is missing the file is assumed to be UTF-8.
|
61
|
+
#
|
62
|
+
# If the encoding declaration is missing or wrong and the file isn't UTF-8,
|
63
|
+
# you can manually set or override it like so:
|
64
|
+
#
|
65
|
+
# reader = ONIX::Reader.new("somefile.xml", :encoding => "iso-8859-1")
|
66
|
+
#
|
67
|
+
# If the file contains invalid bytes for the source encoding an exception will
|
68
|
+
# be raised. This isn't ideal, but I'm still looking for ways to make this
|
69
|
+
# behaviour configurable.
|
70
|
+
#
|
53
71
|
class Reader
|
54
72
|
include Enumerable
|
55
73
|
|
56
74
|
attr_reader :header, :release
|
57
75
|
|
58
|
-
def initialize(input,
|
76
|
+
def initialize(input, *args)
|
77
|
+
opts = args.last.kind_of?(Hash) ? args.pop : {}
|
78
|
+
if args.size > 0
|
79
|
+
ActiveSupport::Deprecation.warn("Passing a klass as ONIX::Reader's second argument is deprecated, use the :product_class option instead", caller)
|
80
|
+
end
|
81
|
+
@product_klass = opts[:product_class] || args.pop || ::ONIX::Product
|
82
|
+
|
59
83
|
if input.kind_of?(String)
|
60
84
|
@file = File.open(input, "r")
|
61
|
-
@reader = Nokogiri::XML::Reader(@file) { |cfg| cfg.dtdload.noent }
|
85
|
+
@reader = Nokogiri::XML::Reader(@file, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
|
62
86
|
elsif input.kind_of?(IO)
|
63
|
-
@reader = Nokogiri::XML::Reader(input) { |cfg| cfg.dtdload.noent }
|
87
|
+
@reader = Nokogiri::XML::Reader(input, nil, opts[:encoding]) { |cfg| cfg.dtdload.noent }
|
64
88
|
else
|
65
89
|
raise ArgumentError, "Unable to read from file or IO stream"
|
66
90
|
end
|
67
91
|
|
68
|
-
@product_klass = product_klass
|
69
|
-
|
70
92
|
@release = find_release
|
71
93
|
@header = find_header
|
72
94
|
|
data/spec/reader_spec.rb
CHANGED
@@ -12,6 +12,7 @@ describe ONIX::Reader do
|
|
12
12
|
@entity_file = File.join(@data_path, "entities.xml")
|
13
13
|
@utf_16_file = File.join(@data_path, "utf_16.xml")
|
14
14
|
@iso_8859_1_file = File.join(@data_path, "iso_8859_1.xml")
|
15
|
+
@no_encoding_decl_file = File.join(@data_path, "aau.xml")
|
15
16
|
end
|
16
17
|
|
17
18
|
it "should initialize with a filename" do
|
@@ -90,19 +91,36 @@ describe ONIX::Reader do
|
|
90
91
|
|
91
92
|
it "should transparently convert a iso-8859-1 file to utf-8" do
|
92
93
|
reader = ONIX::Reader.new(@iso_8859_1_file)
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
94
|
+
reader.each do |product|
|
95
|
+
if RUBY_VERSION >= "1.9"
|
96
|
+
utf8 = Encoding.find("utf-8")
|
97
|
+
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
98
|
+
end
|
97
99
|
|
98
|
-
|
99
|
-
if RUBY_VERSION >= "1.9"
|
100
|
-
utf8 = Encoding.find("utf-8")
|
101
|
-
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
100
|
+
product.contributors[0].person_name_inverted.should eql("Küng, Hans")
|
102
101
|
end
|
102
|
+
end
|
103
103
|
|
104
|
-
|
104
|
+
# This isn't ideal behaviour, but i'm somewhat hamstrung by nokogiri API. It'd
|
105
|
+
# be nice to have the option to replace unrecognised bytes with a valid char.
|
106
|
+
it "should raise an exception when an iso-8859-1 file isn't declared as such" do
|
107
|
+
reader = ONIX::Reader.new(@no_encoding_decl_file)
|
108
|
+
lambda {
|
109
|
+
reader.each do |product|
|
110
|
+
end
|
111
|
+
}.should raise_error(Nokogiri::XML::SyntaxError)
|
112
|
+
end
|
113
|
+
|
114
|
+
it "should transparently convert an iso-8859-1 file to utf-8 when there's no declaration but the user manually specifies iso-8859-1" do
|
115
|
+
reader = ONIX::Reader.new(@no_encoding_decl_file, :encoding => "iso-8859-1")
|
116
|
+
reader.each do |product|
|
117
|
+
if RUBY_VERSION >= "1.9"
|
118
|
+
utf8 = Encoding.find("utf-8")
|
119
|
+
product.contributors[0].person_name_inverted.encoding.should eql(utf8)
|
120
|
+
end
|
105
121
|
|
122
|
+
product.contributors[0].person_name_inverted.should eql("Melo,Patr¡cia")
|
123
|
+
end
|
106
124
|
end
|
107
125
|
|
108
126
|
it "should transparently convert a utf-16 file to utf-8" do
|
@@ -119,6 +137,19 @@ describe ONIX::Reader do
|
|
119
137
|
end
|
120
138
|
|
121
139
|
product.contributors[0].person_name_inverted.should eql("Küng, Hans")
|
140
|
+
end
|
122
141
|
|
142
|
+
it "should support returning an APAProduct using deprecated API" do
|
143
|
+
reader = ONIX::Reader.new(@file1, ONIX::APAProduct)
|
144
|
+
reader.each do |product|
|
145
|
+
product.should be_a_kind_of(ONIX::APAProduct)
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
it "should support returning an APAProduct using new API" do
|
150
|
+
reader = ONIX::Reader.new(@file1, :product_class => ONIX::APAProduct)
|
151
|
+
reader.each do |product|
|
152
|
+
product.should be_a_kind_of(ONIX::APAProduct)
|
153
|
+
end
|
123
154
|
end
|
124
155
|
end
|
metadata
CHANGED
@@ -1,13 +1,12 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash: 53
|
5
4
|
prerelease: false
|
6
5
|
segments:
|
7
6
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
7
|
+
- 9
|
8
|
+
- 0
|
9
|
+
version: 0.9.0
|
11
10
|
platform: ruby
|
12
11
|
authors:
|
13
12
|
- James Healy
|
@@ -15,18 +14,17 @@ autorequire:
|
|
15
14
|
bindir: bin
|
16
15
|
cert_chain: []
|
17
16
|
|
18
|
-
date:
|
17
|
+
date: 2011-04-14 00:00:00 +10:00
|
19
18
|
default_executable:
|
20
19
|
dependencies:
|
21
20
|
- !ruby/object:Gem::Dependency
|
22
|
-
name:
|
21
|
+
name: roxml
|
23
22
|
prerelease: false
|
24
23
|
requirement: &id001 !ruby/object:Gem::Requirement
|
25
24
|
none: false
|
26
25
|
requirements:
|
27
|
-
- -
|
26
|
+
- - ~>
|
28
27
|
- !ruby/object:Gem::Version
|
29
|
-
hash: 15
|
30
28
|
segments:
|
31
29
|
- 3
|
32
30
|
- 1
|
@@ -35,77 +33,87 @@ dependencies:
|
|
35
33
|
type: :runtime
|
36
34
|
version_requirements: *id001
|
37
35
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
36
|
+
name: activesupport
|
39
37
|
prerelease: false
|
40
38
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
39
|
none: false
|
42
40
|
requirements:
|
43
|
-
- -
|
41
|
+
- - ~>
|
44
42
|
- !ruby/object:Gem::Version
|
45
|
-
hash: 3
|
46
43
|
segments:
|
44
|
+
- 3
|
47
45
|
- 0
|
48
|
-
|
46
|
+
- 5
|
47
|
+
version: 3.0.5
|
49
48
|
type: :runtime
|
50
49
|
version_requirements: *id002
|
51
50
|
- !ruby/object:Gem::Dependency
|
52
|
-
name:
|
51
|
+
name: i18n
|
53
52
|
prerelease: false
|
54
53
|
requirement: &id003 !ruby/object:Gem::Requirement
|
55
54
|
none: false
|
56
55
|
requirements:
|
57
56
|
- - ">="
|
58
57
|
- !ruby/object:Gem::Version
|
59
|
-
hash: 3
|
60
58
|
segments:
|
61
59
|
- 0
|
62
60
|
version: "0"
|
63
61
|
type: :runtime
|
64
62
|
version_requirements: *id003
|
65
63
|
- !ruby/object:Gem::Dependency
|
66
|
-
name:
|
64
|
+
name: andand
|
67
65
|
prerelease: false
|
68
66
|
requirement: &id004 !ruby/object:Gem::Requirement
|
69
67
|
none: false
|
70
68
|
requirements:
|
71
69
|
- - ">="
|
72
70
|
- !ruby/object:Gem::Version
|
73
|
-
|
71
|
+
segments:
|
72
|
+
- 0
|
73
|
+
version: "0"
|
74
|
+
type: :runtime
|
75
|
+
version_requirements: *id004
|
76
|
+
- !ruby/object:Gem::Dependency
|
77
|
+
name: nokogiri
|
78
|
+
prerelease: false
|
79
|
+
requirement: &id005 !ruby/object:Gem::Requirement
|
80
|
+
none: false
|
81
|
+
requirements:
|
82
|
+
- - ">="
|
83
|
+
- !ruby/object:Gem::Version
|
74
84
|
segments:
|
75
85
|
- 1
|
76
86
|
- 4
|
77
87
|
version: "1.4"
|
78
88
|
type: :runtime
|
79
|
-
version_requirements: *
|
89
|
+
version_requirements: *id005
|
80
90
|
- !ruby/object:Gem::Dependency
|
81
91
|
name: rake
|
82
92
|
prerelease: false
|
83
|
-
requirement: &
|
93
|
+
requirement: &id006 !ruby/object:Gem::Requirement
|
84
94
|
none: false
|
85
95
|
requirements:
|
86
96
|
- - ">="
|
87
97
|
- !ruby/object:Gem::Version
|
88
|
-
hash: 3
|
89
98
|
segments:
|
90
99
|
- 0
|
91
100
|
version: "0"
|
92
101
|
type: :development
|
93
|
-
version_requirements: *
|
102
|
+
version_requirements: *id006
|
94
103
|
- !ruby/object:Gem::Dependency
|
95
104
|
name: rspec
|
96
105
|
prerelease: false
|
97
|
-
requirement: &
|
106
|
+
requirement: &id007 !ruby/object:Gem::Requirement
|
98
107
|
none: false
|
99
108
|
requirements:
|
100
109
|
- - ~>
|
101
110
|
- !ruby/object:Gem::Version
|
102
|
-
hash: 1
|
103
111
|
segments:
|
104
112
|
- 2
|
105
113
|
- 1
|
106
114
|
version: "2.1"
|
107
115
|
type: :development
|
108
|
-
version_requirements: *
|
116
|
+
version_requirements: *id007
|
109
117
|
description: A convient mapping between ruby objects and the ONIX XML specification
|
110
118
|
email:
|
111
119
|
- jimmy@deefa.com
|
@@ -367,7 +375,6 @@ required_ruby_version: !ruby/object:Gem::Requirement
|
|
367
375
|
requirements:
|
368
376
|
- - ">="
|
369
377
|
- !ruby/object:Gem::Version
|
370
|
-
hash: 3
|
371
378
|
segments:
|
372
379
|
- 0
|
373
380
|
version: "0"
|
@@ -376,7 +383,6 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
376
383
|
requirements:
|
377
384
|
- - ">="
|
378
385
|
- !ruby/object:Gem::Version
|
379
|
-
hash: 3
|
380
386
|
segments:
|
381
387
|
- 0
|
382
388
|
version: "0"
|