onix 0.5.1 → 0.6.0

Sign up to get free protection for your applications and to get access to all the features.
data/CHANGELOG CHANGED
@@ -1,3 +1,10 @@
1
+ v0.6.0 (18th March 2009)
2
+ - remove use of threads in ONIX::Reader
3
+ - a producer/consumer pattern was useful in the REXML stream parsing days, but
4
+ now LibXML's Reader binding provides a better alternative
5
+ - API left unchanged, this was all under the hood
6
+ - bump required ROXML version to 2.5.2
7
+
1
8
  v0.5.1 (4th March 2009)
2
9
  - Fix a single letter typo
3
10
 
@@ -3,7 +3,7 @@ require 'bigdecimal'
3
3
  require 'cgi'
4
4
 
5
5
  # ensure we load the correct gem versions
6
- gem 'roxml', '2.5.1'
6
+ gem 'roxml', '2.5.2'
7
7
  gem 'andand'
8
8
 
9
9
  # and now load the actual gems
@@ -13,8 +13,8 @@ require 'andand'
13
13
  module ONIX
14
14
  module Version #:nodoc:
15
15
  Major = 0
16
- Minor = 5
17
- Tiny = 1
16
+ Minor = 6
17
+ Tiny = 0
18
18
 
19
19
  String = [Major, Minor, Tiny].join('.')
20
20
  end
@@ -1,5 +1,3 @@
1
- require 'thread'
2
- require 'timeout'
3
1
  require 'stringio'
4
2
 
5
3
  module ONIX
@@ -66,65 +64,56 @@ module ONIX
66
64
  end
67
65
 
68
66
  @product_klass = product_klass
67
+ @header = nil
69
68
 
70
- # create a sized queue to store each product read from the file
71
- # We use a separate thread to read products from the source file.
72
- # This queue is a thread-safe way to transfer products from that
73
- # thread back into the main one.
74
- @queue = SizedQueue.new(100)
75
-
76
- # launch a reader thread
77
- Thread.abort_on_exception = true
78
- Thread.new { read_input }
79
-
80
- # don't return from the constructor until the reading thread
81
- # has spun up and put at least one item into the queue. If
82
- # it finds no Products in the file, it queues a nil, so we
83
- # shouldn't get stuck here indefinitely
84
- while @queue.size == 0
85
- sleep 0.05
69
+ while @header.nil?
70
+ obj = read_next
71
+ if obj.kind_of?(ONIX::Header)
72
+ @header = obj
73
+ end
86
74
  end
87
75
  end
88
76
 
89
77
  # Iterate over all the products in an ONIX file
90
78
  #
91
79
  def each(&block)
92
- obj = @queue.pop
93
- while !obj.nil?
80
+ while !(obj = read_next).nil?
94
81
  yield obj
95
- obj = @queue.pop
96
82
  end
97
83
  end
98
84
 
99
85
  private
100
86
 
101
- # Walk the ONIX file, and grab the bits we're interested in.
102
- #
103
- # High level attributes and the header are stored as attributes of the reader
104
- # class. Products are placed in a queue, ready to be popped off when the
105
- # user uses the each() method.
87
+ # Walk the ONIX file, and grab the next header or product fragment. If we
88
+ # encounter other useful bits of info along the way (encoding, etc) then
89
+ # store them for later.
106
90
  #
107
- def read_input
91
+ def read_next
108
92
  while @reader.read
93
+
109
94
  @xml_lang = @reader.xml_lang if @xml_lang.nil?
110
95
  @xml_version = @reader.xml_version.to_f if @xml_version.nil?
111
96
  @encoding = encoding_const_to_name(@reader.encoding) if @encoding.nil?
97
+
112
98
  if @reader.node_type == LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
113
99
  uri = @reader.expand.to_s
114
100
  m, major, minor, rev = *uri.match(/.+(\d)\.(\d)\/(\d*).*/)
115
101
  @version = [major.to_i, minor.to_i, rev.to_i]
116
102
  elsif @reader.name == "Header" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
117
- @header = ONIX::Header.from_xml(@reader.expand.to_s)
103
+ str = @reader.read_outer_xml
118
104
  @reader.next_sibling
105
+ return ONIX::Header.from_xml(str)
119
106
  elsif @reader.name == "Product" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
120
- node = @reader.expand
121
- @queue.push @product_klass.from_xml(node.to_s)
107
+ str = @reader.read_outer_xml
122
108
  @reader.next_sibling
109
+ return @product_klass.from_xml(str)
123
110
  end
124
111
  end
125
- @queue.push nil
112
+ return nil
126
113
  end
127
114
 
115
+ # simple mapping of encoding constants to a string
116
+ #
128
117
  def encoding_const_to_name(const)
129
118
  case const
130
119
  when LibXML::XML::Encoding::UTF_8
@@ -10,6 +10,7 @@ context "ONIX::Reader" do
10
10
  data_path = File.join(File.dirname(__FILE__),"..","data")
11
11
  @file1 = File.join(data_path, "9780194351898.xml")
12
12
  @file2 = File.join(data_path, "two_products.xml")
13
+ @entity_file = File.join(data_path, "entities.xml")
13
14
  end
14
15
 
15
16
  specify "should initialize with a filename" do
@@ -17,14 +18,12 @@ context "ONIX::Reader" do
17
18
  reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
18
19
  end
19
20
 
20
- =begin
21
21
  specify "should initialize with an IO object" do
22
22
  File.open(@file1,"rb") do |f|
23
23
  reader = ONIX::Reader.new(f)
24
24
  reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
25
25
  end
26
26
  end
27
- =end
28
27
 
29
28
  specify "should provide access to various XML metadata from file" do
30
29
  reader = ONIX::Reader.new(@file1)
@@ -61,4 +60,20 @@ context "ONIX::Reader" do
61
60
  products[0].record_reference.should eql("365-9780194351898")
62
61
  products[1].record_reference.should eql("9780754672326")
63
62
  end
63
+
64
+ # libxml can handle the 3 standard entities fine (& < and ^gt;) but
65
+ # barfs when it encounters others. In theory other entityies are defined in the
66
+ # ONIX DTD, but I can't work out how to get libxml to recognise them
67
+ specify "should correctly parse a file that has an entity in it" do
68
+ reader = ONIX::Reader.new(@entity_file)
69
+
70
+ products = []
71
+ reader.each do |product|
72
+ products << product
73
+ end
74
+
75
+ products.size.should eql(1)
76
+ products.first.record_reference.should eql("9780732287573")
77
+ products.first.titles.first.title_text.should eql("High Noon\342\200\223in Nimbin")
78
+ end
64
79
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-04 00:00:00 +11:00
12
+ date: 2009-03-18 00:00:00 +11:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - "="
22
22
  - !ruby/object:Gem::Version
23
- version: 2.5.1
23
+ version: 2.5.2
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: libxml-ruby