onix 0.5.1 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/CHANGELOG +7 -0
- data/lib/onix.rb +3 -3
- data/lib/onix/reader.rb +20 -31
- data/spec/reader_spec.rb +17 -2
- metadata +3 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v0.6.0 (18th March 2009)
|
2
|
+
- remove use of threads in ONIX::Reader
|
3
|
+
- a producer/consumer pattern was useful in the REXML stream parsing days, but
|
4
|
+
now LibXML's Reader binding provides a better alternative
|
5
|
+
- API left unchanged, this was all under the hood
|
6
|
+
- bump required ROXML version to 2.5.2
|
7
|
+
|
1
8
|
v0.5.1 (4th March 2009)
|
2
9
|
- Fix a single letter typo
|
3
10
|
|
data/lib/onix.rb
CHANGED
@@ -3,7 +3,7 @@ require 'bigdecimal'
|
|
3
3
|
require 'cgi'
|
4
4
|
|
5
5
|
# ensure we load the correct gem versions
|
6
|
-
gem 'roxml', '2.5.
|
6
|
+
gem 'roxml', '2.5.2'
|
7
7
|
gem 'andand'
|
8
8
|
|
9
9
|
# and now load the actual gems
|
@@ -13,8 +13,8 @@ require 'andand'
|
|
13
13
|
module ONIX
|
14
14
|
module Version #:nodoc:
|
15
15
|
Major = 0
|
16
|
-
Minor =
|
17
|
-
Tiny =
|
16
|
+
Minor = 6
|
17
|
+
Tiny = 0
|
18
18
|
|
19
19
|
String = [Major, Minor, Tiny].join('.')
|
20
20
|
end
|
data/lib/onix/reader.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'thread'
|
2
|
-
require 'timeout'
|
3
1
|
require 'stringio'
|
4
2
|
|
5
3
|
module ONIX
|
@@ -66,65 +64,56 @@ module ONIX
|
|
66
64
|
end
|
67
65
|
|
68
66
|
@product_klass = product_klass
|
67
|
+
@header = nil
|
69
68
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
# launch a reader thread
|
77
|
-
Thread.abort_on_exception = true
|
78
|
-
Thread.new { read_input }
|
79
|
-
|
80
|
-
# don't return from the constructor until the reading thread
|
81
|
-
# has spun up and put at least one item into the queue. If
|
82
|
-
# it finds no Products in the file, it queues a nil, so we
|
83
|
-
# shouldn't get stuck here indefinitely
|
84
|
-
while @queue.size == 0
|
85
|
-
sleep 0.05
|
69
|
+
while @header.nil?
|
70
|
+
obj = read_next
|
71
|
+
if obj.kind_of?(ONIX::Header)
|
72
|
+
@header = obj
|
73
|
+
end
|
86
74
|
end
|
87
75
|
end
|
88
76
|
|
89
77
|
# Iterate over all the products in an ONIX file
|
90
78
|
#
|
91
79
|
def each(&block)
|
92
|
-
obj =
|
93
|
-
while !obj.nil?
|
80
|
+
while !(obj = read_next).nil?
|
94
81
|
yield obj
|
95
|
-
obj = @queue.pop
|
96
82
|
end
|
97
83
|
end
|
98
84
|
|
99
85
|
private
|
100
86
|
|
101
|
-
# Walk the ONIX file, and grab the
|
102
|
-
#
|
103
|
-
#
|
104
|
-
# class. Products are placed in a queue, ready to be popped off when the
|
105
|
-
# user uses the each() method.
|
87
|
+
# Walk the ONIX file, and grab the next header or product fragment. If we
|
88
|
+
# encounter other useful bits of info along the way (encoding, etc) then
|
89
|
+
# store them for later.
|
106
90
|
#
|
107
|
-
def
|
91
|
+
def read_next
|
108
92
|
while @reader.read
|
93
|
+
|
109
94
|
@xml_lang = @reader.xml_lang if @xml_lang.nil?
|
110
95
|
@xml_version = @reader.xml_version.to_f if @xml_version.nil?
|
111
96
|
@encoding = encoding_const_to_name(@reader.encoding) if @encoding.nil?
|
97
|
+
|
112
98
|
if @reader.node_type == LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
|
113
99
|
uri = @reader.expand.to_s
|
114
100
|
m, major, minor, rev = *uri.match(/.+(\d)\.(\d)\/(\d*).*/)
|
115
101
|
@version = [major.to_i, minor.to_i, rev.to_i]
|
116
102
|
elsif @reader.name == "Header" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
|
117
|
-
|
103
|
+
str = @reader.read_outer_xml
|
118
104
|
@reader.next_sibling
|
105
|
+
return ONIX::Header.from_xml(str)
|
119
106
|
elsif @reader.name == "Product" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
|
120
|
-
|
121
|
-
@queue.push @product_klass.from_xml(node.to_s)
|
107
|
+
str = @reader.read_outer_xml
|
122
108
|
@reader.next_sibling
|
109
|
+
return @product_klass.from_xml(str)
|
123
110
|
end
|
124
111
|
end
|
125
|
-
|
112
|
+
return nil
|
126
113
|
end
|
127
114
|
|
115
|
+
# simple mapping of encoding constants to a string
|
116
|
+
#
|
128
117
|
def encoding_const_to_name(const)
|
129
118
|
case const
|
130
119
|
when LibXML::XML::Encoding::UTF_8
|
data/spec/reader_spec.rb
CHANGED
@@ -10,6 +10,7 @@ context "ONIX::Reader" do
|
|
10
10
|
data_path = File.join(File.dirname(__FILE__),"..","data")
|
11
11
|
@file1 = File.join(data_path, "9780194351898.xml")
|
12
12
|
@file2 = File.join(data_path, "two_products.xml")
|
13
|
+
@entity_file = File.join(data_path, "entities.xml")
|
13
14
|
end
|
14
15
|
|
15
16
|
specify "should initialize with a filename" do
|
@@ -17,14 +18,12 @@ context "ONIX::Reader" do
|
|
17
18
|
reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
|
18
19
|
end
|
19
20
|
|
20
|
-
=begin
|
21
21
|
specify "should initialize with an IO object" do
|
22
22
|
File.open(@file1,"rb") do |f|
|
23
23
|
reader = ONIX::Reader.new(f)
|
24
24
|
reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
|
25
25
|
end
|
26
26
|
end
|
27
|
-
=end
|
28
27
|
|
29
28
|
specify "should provide access to various XML metadata from file" do
|
30
29
|
reader = ONIX::Reader.new(@file1)
|
@@ -61,4 +60,20 @@ context "ONIX::Reader" do
|
|
61
60
|
products[0].record_reference.should eql("365-9780194351898")
|
62
61
|
products[1].record_reference.should eql("9780754672326")
|
63
62
|
end
|
63
|
+
|
64
|
+
# libxml can handle the 3 standard entities fine (& < and ^gt;) but
|
65
|
+
# barfs when it encounters others. In theory other entityies are defined in the
|
66
|
+
# ONIX DTD, but I can't work out how to get libxml to recognise them
|
67
|
+
specify "should correctly parse a file that has an entity in it" do
|
68
|
+
reader = ONIX::Reader.new(@entity_file)
|
69
|
+
|
70
|
+
products = []
|
71
|
+
reader.each do |product|
|
72
|
+
products << product
|
73
|
+
end
|
74
|
+
|
75
|
+
products.size.should eql(1)
|
76
|
+
products.first.record_reference.should eql("9780732287573")
|
77
|
+
products.first.titles.first.title_text.should eql("High Noon\342\200\223in Nimbin")
|
78
|
+
end
|
64
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-03-
|
12
|
+
date: 2009-03-18 00:00:00 +11:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - "="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 2.5.
|
23
|
+
version: 2.5.2
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: libxml-ruby
|