RubyGems - sru - Versions diffs - 0.0.4 → 0.0.5 - Mend

sru 0.0.4 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (10) hide show

data/lib/sru/client.rb +36 -10
data/lib/sru/explain.rb +8 -9
data/lib/sru/response.rb +13 -26
data/lib/sru/scan.rb +1 -1
data/lib/sru/search_retrieve.rb +5 -4
data/lib/sru/term.rb +5 -5
data/lib/sru/xpath.rb +82 -0
data/test/tc_libxml_client_test.rb +55 -0
data/test/{client_test.rb → tc_rexml_client_test.rb} +7 -7
metadata +10 -8

data/lib/sru/client.rb CHANGED

@@ -1,7 +1,6 @@
 require 'uri'
 require 'cgi'
 require 'net/http'
-require 'rexml/document'
 module SRU
@@ -30,9 +29,24 @@ module SRU
     # explain request to determine the version to be used in
     # subsequent requests.
-    def initialize(base)
+    def initialize(base,options={})
       @server = URI.parse base
+      @parser = options.fetch(:parser, 'rexml')
+      case @parser
+         when 'libxml'
+	    begin
+               require 'rubygems'
+               require 'xml/libxml'
+ 	    rescue
+              raise SRU::Exception, "unknown parser: #{@parser}", caller
+	    end
+         when 'rexml'
+	     require 'rexml/document'
+             require 'rexml/xpath'
+         else
+              raise SRU::Exception, "unknown parser: #{@parser}", caller
+         end
       # stash this away for future requests
       @version = self.explain.version
     end
@@ -46,7 +60,7 @@ module SRU
     def explain
       doc = get_doc(:operation => 'explain')
-      return ExplainResponse.new(doc)
+      return ExplainResponse.new(doc, @parser)
     end
@@ -62,8 +76,9 @@ module SRU
       options[:query] = query
       options[:operation] = 'searchRetrieve'
       options[:maximumRecords] = 10 unless options.has_key? :maximumRecords
+      options[:recordSchema] = 'dc' unless options.has_key? :recordSchema
       doc = get_doc(options)
-      return SearchResponse.new(doc)
+      return SearchResponse.new(doc, @parser)
     end
@@ -78,7 +93,7 @@ module SRU
       options[:operation] = 'scan'
       options[:maximumTerms] = 5 unless options.has_key? :maximumTerms
       doc = get_doc(options)
-      return ScanResponse.new(doc)
+      return ScanResponse.new(doc, @parser)
     end
     private
@@ -88,7 +103,8 @@ module SRU
     def get_doc(hash)
       # all requests get a version
-      hash[:version] = @version
+      hash[:version] = @version
       # don't want to monkey with the original
       uri = @server.clone
@@ -99,12 +115,22 @@ module SRU
         "#{entry[0]}=#{CGI.escape(entry[1].to_s)}"
       }
       uri.query = parts.join('&')
       # fetch the xml and build/return a document object from it
       begin
         xml = Net::HTTP.get(uri)
-        return REXML::Document.new(xml)
-      rescue
+         # load appropriate parser
+        case @parser
+          when 'libxml'
+            xmlObj = LibXML::XML::Parser.new()
+	    # not sure why but the explain namespace does bad things to
+            # libxml
+            #xml = xml.gsub(' xmlns="http://explain.z3950.org/dtd/2.0/"', '')
+            xmlObj.string = xml
+            return xmlObj.parse
+          when 'rexml'
+            return REXML::Document.new(xml)
+        end
+      rescue
         raise SRU::Exception, "exception during SRU operation", caller
       end
     end

data/lib/sru/explain.rb CHANGED

@@ -2,37 +2,36 @@ require 'sru/response'
 module SRU
   class ExplainResponse < Response
     def to_s
       return "host=#{host} port=#{port} database=#{database} version=#{version}"
     end
     def host
-      return xpath('.//serverInfo/host')
+      return xpath(@doc,'.//ns0:serverInfo/ns0:host', @namespaces )
     end
     def port
-      port = xpath('.//serverInfo/port')
+      port = xpath(@doc, './/ns0:serverInfo/ns0:port', @namespaces)
       return nil if not port
       return Integer(port)
     end
     def database
-      return xpath('.//serverInfo/database')
+      return xpath(@doc, './/ns0:serverInfo/ns0:database', @namespaces)
     end
     def number_of_records
-      return xpath('.//configInfo/numberOfRecords')
+      return xpath(@doc, './/ns0:configInfo/ns0:numberOfRecords', @namespaces)
     end
     def version
-      version = xpath('.//zs:explainResponse/zs:version')
+      version = xpath(@doc, './/zs:version', @namespaces)
       return version if version
       # also look here
-      info = xpath_first('.//serverInfo')
-      return info.attributes['version'] if info
+      info = xpath(@doc, './/ns0:serverInfo', @namespaces)
+      #return info.attributes['version'] if info
+      return get_attribute(info, "version") if info
       return nil
     end
   end

data/lib/sru/response.rb CHANGED

@@ -1,37 +1,24 @@
-require 'rexml/xpath'
 module SRU
   # base class for all SRU responses
   class Response
-    attr_reader :doc
-    # namespaces for use in xpath queries
-    @@namespaces = {'zs' => 'http://www.loc.gov/zing/srw/'}
+    require 'sru/xpath'
+    include SRU::XPath
-    def initialize(doc)
-      @doc = doc
-    end
-    protected
-    # get all nodes that match an xpath
-    def xpath_all(path)
-      return REXML::XPath.match(@doc, path, @@namespaces)
-    end
+    attr_reader :doc
+    attr_reader :parser
+    attr_reader :namespaces
-    # get the first node that matches an xpath
-    def xpath_first(path)
-      elements = xpath_all(path)
-      return elements[0] if elements != nil
-      return nil
-    end
+    def initialize(doc, parser = 'rexml')
+      @doc = doc
+      @parser = parser
+      # namespaces for use in xpath queries
+      # this is technically more correct and is required for
+      # libxml to be able to parse the explain block.
+         @namespaces = {'zs' => 'http://www.loc.gov/zing/srw/',
+			'ns0' => 'http://explain.z3950.org/dtd/2.0/'}
-    # get the text inside the first node that matches the xpath
-    def xpath(path)
-      e = xpath_first(path)
-      return e.text if e != nil
-      return nil
     end
   end

data/lib/sru/scan.rb CHANGED

@@ -7,7 +7,7 @@ module SRU
     include Enumerable
     def each
-      for term_node in xpath_all('.//zs:term')
+      for term_node in xpath_all(@doc, './/zs:term', @namespaces)
         yield Term.new(term_node)
       end
     end

data/lib/sru/search_retrieve.rb CHANGED

@@ -15,16 +15,17 @@ module SRU
     include Enumerable
     def number_of_records
-      return Integer(xpath('.//zs:numberOfRecords'))
+      return Integer(xpath(@doc, './/zs:numberOfRecords', @namespaces))
     end
     # Returns the contents of each recordData element in a
     # SRU searchRetrieve response.
     def each
-      for record_data in xpath_all('.//zs:recordData')
-        if record_data.elements.size > 0
-          yield record_data.elements[1]
+      obj = xpath_all(@doc, './/zs:recordData', @namespaces)
+      for record_data in obj
+        if obj.size > 0
+          yield record_data
         end
       end
     end

data/lib/sru/term.rb CHANGED

@@ -7,11 +7,11 @@ module SRU
     def initialize(element)
       super element
-      @value = xpath('value')
-      @number_of_records = xpath('numberOfRecords')
-      @display_term = xpath('displayTerm')
-      @where_in_list = xpath('whereInList')
-      @extra_term_data = xpath_first('extraTermData')
+      @value = xpath(@doc, 'value')
+      @number_of_records = xpath(@doc, 'numberOfRecords')
+      @display_term = xpath(@doc, 'displayTerm')
+      @where_in_list = xpath(@doc, 'whereInList')
+      @extra_term_data = xpath_first(@doc, 'extraTermData')
     end
   end
 end

data/lib/sru/xpath.rb ADDED

@@ -0,0 +1,82 @@
+module SRU
+  module XPath
+    # get all matching nodes
+    def xpath_all(pdoc, path, namespace = '')
+      case parser_type(pdoc)
+      when 'libxml'
+        if namespace!=""
+           return pdoc.find(path, namespace).to_a if pdoc.find(path, namespace)
+        else
+ 	   return pdoc.find(path).to_a if pdoc.find(path)
+        end
+      when 'rexml'
+        if namespace!=""
+           return REXML::XPath.match(pdoc, path, namespace)
+        else
+	   return REXML::XPath.match(pdoc, path);
+        end
+      end
+      return []
+    end
+    # get first matching node
+    def xpath_first(pdoc, path, namespace = '')
+      elements = xpath_all(pdoc, path, namespace )
+      return elements[0] if elements != nil
+      return nil
+    end
+    # get text for first matching node
+    def xpath(pdoc, path, namespace = '')
+      el = xpath_first(pdoc, path, namespace)
+      return unless el
+      case parser_type(pdoc)
+      when 'libxml'
+        return el.content
+      when 'rexml'
+        return el.text
+      end
+      return nil
+    end
+    # figure out an attribute
+    def get_attribute(node, attr_name)
+      case node.class.to_s
+      when 'REXML::XML::Element'
+        return node.attribute(attr_name)
+      when 'LibXML::XML::Node'
+        #There has been a method shift between 0.5 and 0.7
+        if defined?(node.property) == nil
+          return node.attributes[attr_name]
+        else
+          return node.property(attr_name)
+        end
+        #begin
+        #        return node.attributes[attr_name]
+        #rescue
+         #   return node.property(attr_name)
+        #end
+      end
+      return nil
+    end
+    private
+    # figure out what sort of object we should do xpath on
+    def parser_type(x)
+      case x.class.to_s
+      when 'LibXML::XML::Document'
+        return 'libxml'
+      when 'LibXML::XML::Node'
+        return 'libxml'
+      when 'LibXML::XML::Node::Set'
+	return 'libxml'
+      when 'REXML::Element'
+        return 'rexml'
+      when 'REXML::Document'
+        return 'rexml'
+      end
+    end
+  end
+end

data/test/tc_libxml_client_test.rb ADDED

@@ -0,0 +1,55 @@
+class TcLibxmlClientTests < Test::Unit::TestCase
+  def test_explain
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
+    explain = client.explain
+    assert_equal SRU::ExplainResponse, explain.class
+    assert_equal '1.1', explain.version
+    assert_equal 'localhost', explain.host
+    assert_equal 7090, explain.port
+    assert_equal 'voyager', explain.database
+    assert_equal 'host=localhost port=7090 database=voyager version=1.1',
+      explain.to_s
+  end
+  def test_search_retrieve
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
+    results = client.search_retrieve 'twain', :maximumRecords => 5
+    assert_equal 5, results.entries.size
+    assert results.number_of_records > 2000
+    assert_equal LibXML::XML::Node, results.entries[0].class
+    assert_equal 'recordData', results.entries[0].name
+    # hopefully there isn't a document that matches this :)
+    results = client.search_retrieve 'fidkidkdiejfl'
+    assert_equal 0, results.entries.size
+  end
+  def test_default_maximum_records
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'libxml'
+    results = client.search_retrieve 'twain'
+    assert_equal 10, results.entries.size
+  end
+  # need to find a target that supports scan so we can exercise it
+  #def test_scan
+  #  # this scan response appears to be canned might need to change
+  #  client = SRU::Client.new 'http://tweed.lib.ed.ac.uk:8080/elf/search/copac'
+  #  scan = client.scan('foobar')
+  #  assert scan.entries.size > 0
+  #  assert_equal SRU::Term, scan.entries[0].class
+  #  assert_equal 'low', scan.entries[0].value
+  #  assert_equal '1', scan.entries[0].number_of_records
+  #end
+  def test_xml_exception
+    assert_raise(SRU::Exception) {SRU::Client.new 'http://www.google.com'}
+  end
+  def test_http_exception
+    assert_raise(SRU::Exception) {SRU::Client.new 'http://example.com'}
+  end
+end

data/test/{client_test.rb → tc_rexml_client_test.rb} RENAMED

@@ -1,24 +1,24 @@
-class ClientTests < Test::Unit::TestCase
+class TCRexmlClientTests < Test::Unit::TestCase
   def test_explain
-    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager',:parser=>'rexml'
     explain = client.explain
     assert_equal SRU::ExplainResponse, explain.class
     assert_equal '1.1', explain.version
-    assert_equal 'z3950.loc.gov', explain.host
+    assert_equal 'localhost', explain.host
     assert_equal 7090, explain.port
     assert_equal 'voyager', explain.database
-    assert_equal 'host=z3950.loc.gov port=7090 database=voyager version=1.1',
+    assert_equal 'host=localhost port=7090 database=voyager version=1.1',
       explain.to_s
   end
   def test_search_retrieve
-    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
     results = client.search_retrieve 'twain', :maximumRecords => 5
     assert_equal 5, results.entries.size
     assert results.number_of_records > 2000
     assert_equal REXML::Element, results.entries[0].class
-    assert_equal 'record', results.entries[0].name
+    assert_equal 'recordData', results.entries[0].name
     # hopefully there isn't a document that matches this :)
     results = client.search_retrieve 'fidkidkdiejfl'
@@ -26,7 +26,7 @@ class ClientTests < Test::Unit::TestCase
   end
   def test_default_maximum_records
-    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager'
+    client = SRU::Client.new 'http://z3950.loc.gov:7090/voyager', :parser=>'rexml'
     results = client.search_retrieve 'twain'
     assert_equal 10, results.entries.size
   end

metadata CHANGED

@@ -1,10 +1,10 @@
 --- !ruby/object:Gem::Specification
-rubygems_version: 0.9.2
+rubygems_version: 0.9.4
 specification_version: 1
 name: sru
 version: !ruby/object:Gem::Version
-  version: 0.0.4
-date: 2007-12-13 00:00:00 -05:00
+  version: 0.0.5
+date: 2008-09-12 00:00:00 -07:00
 summary: a Ruby library for Search and Retrieve by URL
 require_paths:
 - lib
@@ -30,15 +30,17 @@ authors:
 - Ed Summers
 files:
 - lib/sru
-- lib/sru.rb
-- lib/sru/client.rb
-- lib/sru/exception.rb
+- lib/sru/xpath.rb
 - lib/sru/explain.rb
+- lib/sru/exception.rb
+- lib/sru/search_retrieve.rb
 - lib/sru/response.rb
 - lib/sru/scan.rb
-- lib/sru/search_retrieve.rb
 - lib/sru/term.rb
-- test/client_test.rb
+- lib/sru/client.rb
+- lib/sru.rb
+- test/tc_rexml_client_test.rb
+- test/tc_libxml_client_test.rb
 test_files: []
 rdoc_options: []