onix 0.5.1 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- data/CHANGELOG +7 -0
- data/lib/onix.rb +3 -3
- data/lib/onix/reader.rb +20 -31
- data/spec/reader_spec.rb +17 -2
- metadata +3 -3
data/CHANGELOG
CHANGED
@@ -1,3 +1,10 @@
|
|
1
|
+
v0.6.0 (18th March 2009)
|
2
|
+
- remove use of threads in ONIX::Reader
|
3
|
+
- a producer/consumer pattern was useful in the REXML stream parsing days, but
|
4
|
+
now LibXML's Reader binding provides a better alternative
|
5
|
+
- API left unchanged, this was all under the hood
|
6
|
+
- bump required ROXML version to 2.5.2
|
7
|
+
|
1
8
|
v0.5.1 (4th March 2009)
|
2
9
|
- Fix a single letter typo
|
3
10
|
|
data/lib/onix.rb
CHANGED
@@ -3,7 +3,7 @@ require 'bigdecimal'
|
|
3
3
|
require 'cgi'
|
4
4
|
|
5
5
|
# ensure we load the correct gem versions
|
6
|
-
gem 'roxml', '2.5.
|
6
|
+
gem 'roxml', '2.5.2'
|
7
7
|
gem 'andand'
|
8
8
|
|
9
9
|
# and now load the actual gems
|
@@ -13,8 +13,8 @@ require 'andand'
|
|
13
13
|
module ONIX
|
14
14
|
module Version #:nodoc:
|
15
15
|
Major = 0
|
16
|
-
Minor =
|
17
|
-
Tiny =
|
16
|
+
Minor = 6
|
17
|
+
Tiny = 0
|
18
18
|
|
19
19
|
String = [Major, Minor, Tiny].join('.')
|
20
20
|
end
|
data/lib/onix/reader.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'thread'
|
2
|
-
require 'timeout'
|
3
1
|
require 'stringio'
|
4
2
|
|
5
3
|
module ONIX
|
@@ -66,65 +64,56 @@ module ONIX
|
|
66
64
|
end
|
67
65
|
|
68
66
|
@product_klass = product_klass
|
67
|
+
@header = nil
|
69
68
|
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
# launch a reader thread
|
77
|
-
Thread.abort_on_exception = true
|
78
|
-
Thread.new { read_input }
|
79
|
-
|
80
|
-
# don't return from the constructor until the reading thread
|
81
|
-
# has spun up and put at least one item into the queue. If
|
82
|
-
# it finds no Products in the file, it queues a nil, so we
|
83
|
-
# shouldn't get stuck here indefinitely
|
84
|
-
while @queue.size == 0
|
85
|
-
sleep 0.05
|
69
|
+
while @header.nil?
|
70
|
+
obj = read_next
|
71
|
+
if obj.kind_of?(ONIX::Header)
|
72
|
+
@header = obj
|
73
|
+
end
|
86
74
|
end
|
87
75
|
end
|
88
76
|
|
89
77
|
# Iterate over all the products in an ONIX file
|
90
78
|
#
|
91
79
|
def each(&block)
|
92
|
-
obj =
|
93
|
-
while !obj.nil?
|
80
|
+
while !(obj = read_next).nil?
|
94
81
|
yield obj
|
95
|
-
obj = @queue.pop
|
96
82
|
end
|
97
83
|
end
|
98
84
|
|
99
85
|
private
|
100
86
|
|
101
|
-
# Walk the ONIX file, and grab the
|
102
|
-
#
|
103
|
-
#
|
104
|
-
# class. Products are placed in a queue, ready to be popped off when the
|
105
|
-
# user uses the each() method.
|
87
|
+
# Walk the ONIX file, and grab the next header or product fragment. If we
|
88
|
+
# encounter other useful bits of info along the way (encoding, etc) then
|
89
|
+
# store them for later.
|
106
90
|
#
|
107
|
-
def
|
91
|
+
def read_next
|
108
92
|
while @reader.read
|
93
|
+
|
109
94
|
@xml_lang = @reader.xml_lang if @xml_lang.nil?
|
110
95
|
@xml_version = @reader.xml_version.to_f if @xml_version.nil?
|
111
96
|
@encoding = encoding_const_to_name(@reader.encoding) if @encoding.nil?
|
97
|
+
|
112
98
|
if @reader.node_type == LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
|
113
99
|
uri = @reader.expand.to_s
|
114
100
|
m, major, minor, rev = *uri.match(/.+(\d)\.(\d)\/(\d*).*/)
|
115
101
|
@version = [major.to_i, minor.to_i, rev.to_i]
|
116
102
|
elsif @reader.name == "Header" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
|
117
|
-
|
103
|
+
str = @reader.read_outer_xml
|
118
104
|
@reader.next_sibling
|
105
|
+
return ONIX::Header.from_xml(str)
|
119
106
|
elsif @reader.name == "Product" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
|
120
|
-
|
121
|
-
@queue.push @product_klass.from_xml(node.to_s)
|
107
|
+
str = @reader.read_outer_xml
|
122
108
|
@reader.next_sibling
|
109
|
+
return @product_klass.from_xml(str)
|
123
110
|
end
|
124
111
|
end
|
125
|
-
|
112
|
+
return nil
|
126
113
|
end
|
127
114
|
|
115
|
+
# simple mapping of encoding constants to a string
|
116
|
+
#
|
128
117
|
def encoding_const_to_name(const)
|
129
118
|
case const
|
130
119
|
when LibXML::XML::Encoding::UTF_8
|
data/spec/reader_spec.rb
CHANGED
@@ -10,6 +10,7 @@ context "ONIX::Reader" do
|
|
10
10
|
data_path = File.join(File.dirname(__FILE__),"..","data")
|
11
11
|
@file1 = File.join(data_path, "9780194351898.xml")
|
12
12
|
@file2 = File.join(data_path, "two_products.xml")
|
13
|
+
@entity_file = File.join(data_path, "entities.xml")
|
13
14
|
end
|
14
15
|
|
15
16
|
specify "should initialize with a filename" do
|
@@ -17,14 +18,12 @@ context "ONIX::Reader" do
|
|
17
18
|
reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
|
18
19
|
end
|
19
20
|
|
20
|
-
=begin
|
21
21
|
specify "should initialize with an IO object" do
|
22
22
|
File.open(@file1,"rb") do |f|
|
23
23
|
reader = ONIX::Reader.new(f)
|
24
24
|
reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
|
25
25
|
end
|
26
26
|
end
|
27
|
-
=end
|
28
27
|
|
29
28
|
specify "should provide access to various XML metadata from file" do
|
30
29
|
reader = ONIX::Reader.new(@file1)
|
@@ -61,4 +60,20 @@ context "ONIX::Reader" do
|
|
61
60
|
products[0].record_reference.should eql("365-9780194351898")
|
62
61
|
products[1].record_reference.should eql("9780754672326")
|
63
62
|
end
|
63
|
+
|
64
|
+
# libxml can handle the 3 standard entities fine (& < and ^gt;) but
|
65
|
+
# barfs when it encounters others. In theory other entityies are defined in the
|
66
|
+
# ONIX DTD, but I can't work out how to get libxml to recognise them
|
67
|
+
specify "should correctly parse a file that has an entity in it" do
|
68
|
+
reader = ONIX::Reader.new(@entity_file)
|
69
|
+
|
70
|
+
products = []
|
71
|
+
reader.each do |product|
|
72
|
+
products << product
|
73
|
+
end
|
74
|
+
|
75
|
+
products.size.should eql(1)
|
76
|
+
products.first.record_reference.should eql("9780732287573")
|
77
|
+
products.first.titles.first.title_text.should eql("High Noon\342\200\223in Nimbin")
|
78
|
+
end
|
64
79
|
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: onix
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.6.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- James Healy
|
@@ -9,7 +9,7 @@ autorequire:
|
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
11
|
|
12
|
-
date: 2009-03-
|
12
|
+
date: 2009-03-18 00:00:00 +11:00
|
13
13
|
default_executable:
|
14
14
|
dependencies:
|
15
15
|
- !ruby/object:Gem::Dependency
|
@@ -20,7 +20,7 @@ dependencies:
|
|
20
20
|
requirements:
|
21
21
|
- - "="
|
22
22
|
- !ruby/object:Gem::Version
|
23
|
-
version: 2.5.
|
23
|
+
version: 2.5.2
|
24
24
|
version:
|
25
25
|
- !ruby/object:Gem::Dependency
|
26
26
|
name: libxml-ruby
|