onix 0.5.1 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/CHANGELOG CHANGED
@@ -1,3 +1,10 @@
1
+ v0.6.0 (18th March 2009)
2
+ - remove use of threads in ONIX::Reader
3
+ - a producer/consumer pattern was useful in the REXML stream parsing days, but
4
+ now LibXML's Reader binding provides a better alternative
5
+ - API left unchanged, this was all under the hood
6
+ - bump required ROXML version to 2.5.2
7
+
1
8
  v0.5.1 (4th March 2009)
2
9
  - Fix a single letter typo
3
10
 
@@ -3,7 +3,7 @@ require 'bigdecimal'
3
3
  require 'cgi'
4
4
 
5
5
  # ensure we load the correct gem versions
6
- gem 'roxml', '2.5.1'
6
+ gem 'roxml', '2.5.2'
7
7
  gem 'andand'
8
8
 
9
9
  # and now load the actual gems
@@ -13,8 +13,8 @@ require 'andand'
13
13
  module ONIX
14
14
  module Version #:nodoc:
15
15
  Major = 0
16
- Minor = 5
17
- Tiny = 1
16
+ Minor = 6
17
+ Tiny = 0
18
18
 
19
19
  String = [Major, Minor, Tiny].join('.')
20
20
  end
@@ -1,5 +1,3 @@
1
- require 'thread'
2
- require 'timeout'
3
1
  require 'stringio'
4
2
 
5
3
  module ONIX
@@ -66,65 +64,56 @@ module ONIX
66
64
  end
67
65
 
68
66
  @product_klass = product_klass
67
+ @header = nil
69
68
 
70
- # create a sized queue to store each product read from the file
71
- # We use a separate thread to read products from the source file.
72
- # This queue is a thread-safe way to transfer products from that
73
- # thread back into the main one.
74
- @queue = SizedQueue.new(100)
75
-
76
- # launch a reader thread
77
- Thread.abort_on_exception = true
78
- Thread.new { read_input }
79
-
80
- # don't return from the constructor until the reading thread
81
- # has spun up and put at least one item into the queue. If
82
- # it finds no Products in the file, it queues a nil, so we
83
- # shouldn't get stuck here indefinitely
84
- while @queue.size == 0
85
- sleep 0.05
69
+ while @header.nil?
70
+ obj = read_next
71
+ if obj.kind_of?(ONIX::Header)
72
+ @header = obj
73
+ end
86
74
  end
87
75
  end
88
76
 
89
77
  # Iterate over all the products in an ONIX file
90
78
  #
91
79
  def each(&block)
92
- obj = @queue.pop
93
- while !obj.nil?
80
+ while !(obj = read_next).nil?
94
81
  yield obj
95
- obj = @queue.pop
96
82
  end
97
83
  end
98
84
 
99
85
  private
100
86
 
101
- # Walk the ONIX file, and grab the bits we're interested in.
102
- #
103
- # High level attributes and the header are stored as attributes of the reader
104
- # class. Products are placed in a queue, ready to be popped off when the
105
- # user uses the each() method.
87
+ # Walk the ONIX file, and grab the next header or product fragment. If we
88
+ # encounter other useful bits of info along the way (encoding, etc) then
89
+ # store them for later.
106
90
  #
107
- def read_input
91
+ def read_next
108
92
  while @reader.read
93
+
109
94
  @xml_lang = @reader.xml_lang if @xml_lang.nil?
110
95
  @xml_version = @reader.xml_version.to_f if @xml_version.nil?
111
96
  @encoding = encoding_const_to_name(@reader.encoding) if @encoding.nil?
97
+
112
98
  if @reader.node_type == LibXML::XML::Reader::TYPE_DOCUMENT_TYPE
113
99
  uri = @reader.expand.to_s
114
100
  m, major, minor, rev = *uri.match(/.+(\d)\.(\d)\/(\d*).*/)
115
101
  @version = [major.to_i, minor.to_i, rev.to_i]
116
102
  elsif @reader.name == "Header" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
117
- @header = ONIX::Header.from_xml(@reader.expand.to_s)
103
+ str = @reader.read_outer_xml
118
104
  @reader.next_sibling
105
+ return ONIX::Header.from_xml(str)
119
106
  elsif @reader.name == "Product" && @reader.node_type == LibXML::XML::Reader::TYPE_ELEMENT
120
- node = @reader.expand
121
- @queue.push @product_klass.from_xml(node.to_s)
107
+ str = @reader.read_outer_xml
122
108
  @reader.next_sibling
109
+ return @product_klass.from_xml(str)
123
110
  end
124
111
  end
125
- @queue.push nil
112
+ return nil
126
113
  end
127
114
 
115
+ # simple mapping of encoding constants to a string
116
+ #
128
117
  def encoding_const_to_name(const)
129
118
  case const
130
119
  when LibXML::XML::Encoding::UTF_8
@@ -10,6 +10,7 @@ context "ONIX::Reader" do
10
10
  data_path = File.join(File.dirname(__FILE__),"..","data")
11
11
  @file1 = File.join(data_path, "9780194351898.xml")
12
12
  @file2 = File.join(data_path, "two_products.xml")
13
+ @entity_file = File.join(data_path, "entities.xml")
13
14
  end
14
15
 
15
16
  specify "should initialize with a filename" do
@@ -17,14 +18,12 @@ context "ONIX::Reader" do
17
18
  reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
18
19
  end
19
20
 
20
- =begin
21
21
  specify "should initialize with an IO object" do
22
22
  File.open(@file1,"rb") do |f|
23
23
  reader = ONIX::Reader.new(f)
24
24
  reader.instance_variable_get("@reader").should be_a_kind_of(LibXML::XML::Reader)
25
25
  end
26
26
  end
27
- =end
28
27
 
29
28
  specify "should provide access to various XML metadata from file" do
30
29
  reader = ONIX::Reader.new(@file1)
@@ -61,4 +60,20 @@ context "ONIX::Reader" do
61
60
  products[0].record_reference.should eql("365-9780194351898")
62
61
  products[1].record_reference.should eql("9780754672326")
63
62
  end
63
+
64
+ # libxml can handle the 3 standard entities fine (& < and ^gt;) but
65
+ # barfs when it encounters others. In theory other entityies are defined in the
66
+ # ONIX DTD, but I can't work out how to get libxml to recognise them
67
+ specify "should correctly parse a file that has an entity in it" do
68
+ reader = ONIX::Reader.new(@entity_file)
69
+
70
+ products = []
71
+ reader.each do |product|
72
+ products << product
73
+ end
74
+
75
+ products.size.should eql(1)
76
+ products.first.record_reference.should eql("9780732287573")
77
+ products.first.titles.first.title_text.should eql("High Noon\342\200\223in Nimbin")
78
+ end
64
79
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: onix
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.5.1
4
+ version: 0.6.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Healy
@@ -9,7 +9,7 @@ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
11
 
12
- date: 2009-03-04 00:00:00 +11:00
12
+ date: 2009-03-18 00:00:00 +11:00
13
13
  default_executable:
14
14
  dependencies:
15
15
  - !ruby/object:Gem::Dependency
@@ -20,7 +20,7 @@ dependencies:
20
20
  requirements:
21
21
  - - "="
22
22
  - !ruby/object:Gem::Version
23
- version: 2.5.1
23
+ version: 2.5.2
24
24
  version:
25
25
  - !ruby/object:Gem::Dependency
26
26
  name: libxml-ruby