RubyGems - bento_search - Versions diffs - 0.0.1 - Mend

bento_search 0.0.1

Files changed (122) hide show

data/MIT-LICENSE +20 -0
data/README.md +299 -0
data/Rakefile +40 -0
data/app/assets/images/bento_search/large_loader.gif +0 -0
data/app/assets/javascripts/bento_search.js +3 -0
data/app/assets/javascripts/bento_search/ajax_load.js +22 -0
data/app/assets/stylesheets/bento_search/bento.css +4 -0
data/app/controllers/bento_search/bento_search_controller.rb +7 -0
data/app/controllers/bento_search/search_controller.rb +72 -0
data/app/helpers/bento_search_helper.rb +138 -0
data/app/item_decorators/bento_search/only_premade_openurl.rb +16 -0
data/app/item_decorators/bento_search/openurl_add_other_link.rb +35 -0
data/app/item_decorators/bento_search/openurl_main_link.rb +30 -0
data/app/models/bento_search/author.rb +25 -0
data/app/models/bento_search/link.rb +30 -0
data/app/models/bento_search/multi_searcher.rb +109 -0
data/app/models/bento_search/openurl_creator.rb +128 -0
data/app/models/bento_search/registrar.rb +70 -0
data/app/models/bento_search/result_item.rb +203 -0
data/app/models/bento_search/results.rb +54 -0
data/app/models/bento_search/results/pagination.rb +67 -0
data/app/models/bento_search/search_engine.rb +219 -0
data/app/models/bento_search/search_engine/capabilities.rb +65 -0
data/app/search_engines/bento_search/#Untitled-1# +11 -0
data/app/search_engines/bento_search/ebsco_host_engine.rb +356 -0
data/app/search_engines/bento_search/eds_engine.rb +557 -0
data/app/search_engines/bento_search/google_books_engine.rb +184 -0
data/app/search_engines/bento_search/primo_engine.rb +231 -0
data/app/search_engines/bento_search/scopus_engine.rb +295 -0
data/app/search_engines/bento_search/summon_engine.rb +398 -0
data/app/search_engines/bento_search/xerxes_engine.rb +168 -0
data/app/views/bento_search/_link.html.erb +4 -0
data/app/views/bento_search/_search_error.html.erb +22 -0
data/app/views/bento_search/_std_item.html.erb +39 -0
data/app/views/bento_search/search/search.html.erb +1 -0
data/config/locales/en.yml +25 -0
data/lib/bento_search.rb +29 -0
data/lib/bento_search/engine.rb +5 -0
data/lib/bento_search/routes.rb +45 -0
data/lib/bento_search/version.rb +3 -0
data/lib/generators/bento_search/pull_ebsco_dbs_generator.rb +24 -0
data/lib/generators/bento_search/templates/ebsco_global_var.erb +6 -0
data/lib/http_client_patch/include_client.rb +86 -0
data/lib/tasks/bento_search_tasks.rake +4 -0
data/test/dummy/README.rdoc +261 -0
data/test/dummy/Rakefile +7 -0
data/test/dummy/app/assets/javascripts/application.js +15 -0
data/test/dummy/app/assets/stylesheets/application.css +13 -0
data/test/dummy/app/controllers/application_controller.rb +3 -0
data/test/dummy/app/helpers/application_helper.rb +2 -0
data/test/dummy/app/views/layouts/application.html.erb +14 -0
data/test/dummy/config.ru +4 -0
data/test/dummy/config/application.rb +56 -0
data/test/dummy/config/boot.rb +10 -0
data/test/dummy/config/database.yml +25 -0
data/test/dummy/config/environment.rb +5 -0
data/test/dummy/config/environments/development.rb +37 -0
data/test/dummy/config/environments/production.rb +67 -0
data/test/dummy/config/environments/test.rb +37 -0
data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/test/dummy/config/initializers/inflections.rb +15 -0
data/test/dummy/config/initializers/mime_types.rb +5 -0
data/test/dummy/config/initializers/secret_token.rb +7 -0
data/test/dummy/config/initializers/session_store.rb +8 -0
data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
data/test/dummy/config/locales/en.yml +5 -0
data/test/dummy/config/routes.rb +6 -0
data/test/dummy/db/test.sqlite3 +0 -0
data/test/dummy/log/test.log +3100 -0
data/test/dummy/public/404.html +26 -0
data/test/dummy/public/422.html +26 -0
data/test/dummy/public/500.html +25 -0
data/test/dummy/public/favicon.ico +0 -0
data/test/dummy/script/rails +6 -0
data/test/functional/bento_search/search_controller_test.rb +81 -0
data/test/helper/bento_search_helper_test.rb +125 -0
data/test/integration/navigation_test.rb +10 -0
data/test/support/mock_engine.rb +23 -0
data/test/support/test_with_cassette.rb +38 -0
data/test/test_helper.rb +52 -0
data/test/unit/#vcr_test.rb# +68 -0
data/test/unit/ebsco_host_engine_test.rb +134 -0
data/test/unit/eds_engine_test.rb +105 -0
data/test/unit/google_books_engine_test.rb +93 -0
data/test/unit/item_decorators_test.rb +66 -0
data/test/unit/multi_searcher_test.rb +49 -0
data/test/unit/openurl_creator_test.rb +111 -0
data/test/unit/pagination_test.rb +59 -0
data/test/unit/primo_engine_test.rb +37 -0
data/test/unit/register_engine_test.rb +50 -0
data/test/unit/result_item_display_test.rb +39 -0
data/test/unit/result_item_test.rb +36 -0
data/test/unit/scopus_engine_test.rb +130 -0
data/test/unit/search_engine_base_test.rb +178 -0
data/test/unit/search_engine_test.rb +95 -0
data/test/unit/summon_engine_test.rb +161 -0
data/test/unit/xerxes_engine_test.rb +70 -0
data/test/vcr_cassettes/ebscohost/error_bad_db.yml +45 -0
data/test/vcr_cassettes/ebscohost/error_bad_password.yml +45 -0
data/test/vcr_cassettes/ebscohost/get_info.yml +3626 -0
data/test/vcr_cassettes/ebscohost/live_search.yml +45 -0
data/test/vcr_cassettes/ebscohost/live_search_smoke_test.yml +1311 -0
data/test/vcr_cassettes/eds/basic_search_smoke_test.yml +1811 -0
data/test/vcr_cassettes/eds/get_auth_token.yml +75 -0
data/test/vcr_cassettes/eds/get_auth_token_failure.yml +39 -0
data/test/vcr_cassettes/eds/get_with_auth.yml +243 -0
data/test/vcr_cassettes/eds/get_with_auth_recovers_from_bad_auth.yml +368 -0
data/test/vcr_cassettes/gbs/error_condition.yml +40 -0
data/test/vcr_cassettes/gbs/pagination.yml +702 -0
data/test/vcr_cassettes/gbs/search.yml +340 -0
data/test/vcr_cassettes/primo/search_smoke_test.yml +1112 -0
data/test/vcr_cassettes/scopus/bad_api_key_should_return_error_response.yml +60 -0
data/test/vcr_cassettes/scopus/escaped_chars.yml +187 -0
data/test/vcr_cassettes/scopus/fielded_search.yml +176 -0
data/test/vcr_cassettes/scopus/simple_search.yml +227 -0
data/test/vcr_cassettes/scopus/zero_results_search.yml +67 -0
data/test/vcr_cassettes/summon/bad_auth.yml +54 -0
data/test/vcr_cassettes/summon/proper_tags_for_snippets.yml +216 -0
data/test/vcr_cassettes/summon/search.yml +242 -0
data/test/vcr_cassettes/xerxes/live_search.yml +2580 -0
data/test/view/std_item_test.rb +98 -0
metadata +421 -0

data/app/search_engines/bento_search/google_books_engine.rb ADDED Viewed

@@ -0,0 +1,184 @@
+require 'httpclient'
+require 'cgi'
+require 'multi_json'
+# not sure why we need to require the entire 'helpers'
+# when all we want is sanitize_helper, but I think we do:
+require 'action_view/helpers'
+#require 'action_view/helpers/sanitize_helper'
+require 'http_client_patch/include_client'
+module BentoSearch
+  #
+  # https://developers.google.com/books/docs/v1/using
+  # https://developers.google.com/books/docs/v1/reference/volumes#resource
+  #
+  # Configuration :api_key STRONGLY recommended, or google will severely
+  # rate-limit you.
+  class GoogleBooksEngine
+    include BentoSearch::SearchEngine
+    include ActionView::Helpers::SanitizeHelper
+    extend HTTPClientPatch::IncludeClient
+    include_http_client # gives us a #http_client with persistent class-level
+    class_attribute :base_url
+    self.base_url = "https://www.googleapis.com/books/v1/"
+    def search_implementation(arguments)
+      query_url = args_to_search_url(arguments)
+      results = Results.new
+      begin
+        response = http_client.get(query_url )
+        json = MultiJson.load( response.body )
+        # Can't rescue everything, or we catch VCR errors, making
+        # things confusing.
+      rescue TimeoutError, HTTPClient::TimeoutError,
+            HTTPClient::ConfigurationError, HTTPClient::BadResponseError  => e
+        results.error ||= {}
+        results.error[:exception] = e
+      end
+      # Trap json parse error, but also check for bad http
+      # status, or error reported in the json. In any of those cases
+      # return results obj with error status.
+      #
+      if ( response.nil? || json.nil? ||
+          (! HTTP::Status.successful? response.status) ||
+          (json && json["error"]))
+       results.error ||= {}
+       results.error[:status] = response.status if response
+       if json && json["error"] && json["error"]["errors"] && json["error"]["errors"].kind_of?(Array)
+         results.error[:message] = json["error"]["errors"].first.values.join(", ")
+       end
+       results.error[:error_info] = json["error"] if json && json.respond_to?("[]")
+       # escape early!
+       return results
+      end
+      results.total_items = json["totalItems"]
+      json["items"].each do |j_item|
+        j_item = j_item["volumeInfo"] if j_item["volumeInfo"]
+        item = ResultItem.new
+        results << item
+        item.title          = j_item["title"]
+        item.subtitle       = j_item["subtitle"]
+        item.publisher      = j_item["publisher"]
+        item.link           = j_item["canonicalVolumeLink"]
+        item.abstract       = sanitize j_item["description"]
+        item.year           = get_year j_item["publishedDate"]
+        item.format         = if j_item["printType"] == "MAGAZINE"
+                              :serial
+                            else
+                              "Book"
+                            end
+        (j_item["authors"] || []).each do |author_name|
+          item.authors << Author.new(:display => author_name)
+        end
+      end
+      return results
+    end
+    ###########
+    # BentoBox::SearchEngine API
+    ###########
+    def max_per_page
+      100
+    end
+    def search_field_definitions
+      { "intitle"     => {:semantic => :title},
+        "inauthor"    => {:semantic => :author},
+        "inpublisher" => {:semantic => :publisher},
+        "subject"     => {:semantic => :subject},
+        "isbn"        => {:semantic => :isbn}
+      }
+    end
+    def sort_definitions
+      {
+        "relevance" => {:implementation => nil}, # default
+        "date_desc" => {:implementation => "newest"}
+      }
+    end
+    protected
+    #############
+    # Our own implementation code
+    ##############
+    # takes a normalized #search arguments hash from SearchEngine
+    # turns it into a URL for Google API. Factored out to make testing
+    # possible.
+    def args_to_search_url(arguments)
+      query = if arguments[:search_field]
+        fielded_query(arguments[:query], arguments[:search_field])
+      else
+        arguments[:query]
+      end
+      query_url = base_url + "volumes?q=#{CGI.escape  query}"
+      if configuration.api_key
+        query_url += "&key=#{configuration.api_key}"
+      end
+      if arguments[:per_page]
+        query_url += "&maxResults=#{arguments[:per_page]}"
+      end
+      if arguments[:start]
+        query_url += "&startIndex=#{arguments[:start]}"
+      end
+      if arguments[:sort] &&
+          (defn = sort_definitions[arguments[:sort]]) &&
+          (value = defn[:implementation])
+        query_url += "&sort=#{CGI.escape(value)}"
+      end
+      return query_url
+    end
+    # If they ask for a <one two> :intitle, we're
+    # actually gonna do like google's own form does,
+    # and change it to <intitle:one intitle:two>. Internal
+    # phrases will be respected.
+    def fielded_query(query, field)
+      tokens = query.split(%r{\s|("[^"]+")}).delete_if {|a| a.blank?}
+      return tokens.collect {|token| "#{field}:#{token}"}.join(" ")
+    end
+    def get_year(iso8601)
+      return nil if iso8601.blank?
+      if iso8601 =~ /^(\d{4})/
+        return $1.to_i
+      end
+      return nil
+    end
+  end
+end

data/app/search_engines/bento_search/primo_engine.rb ADDED Viewed

@@ -0,0 +1,231 @@
+require 'cgi'
+require 'nokogiri'
+require 'http_client_patch/include_client'
+require 'httpclient'
+# ExLibris Primo Central.
+#
+# written/tested with PrimoCentral aggregated index only, but probably
+# should work with any Primo, may need some assumption tweaks.
+#
+# == Required Configuration
+#
+# [:host_port] your unique Primo's host/port combo, like "something.exlibrisgroup.com:1701".
+#              it's assumed we can talk to your primo at
+#              http://$host_port/PrimoWebServices/xservice/search/brief?
+# [:institution] Primo requires an institution paramter.
+#                right now we have a hard-coded assumed 'institution' in
+#                config. Eg. "GWCC"
+#
+#
+# == Other Primo-Specific Configuration
+#
+# [:loc]  The primo 'loc' paramter, default "adaptor,primo_central_multiple_fe"
+#         for Primo Central Index searches.
+# [:auth] Set to 'true' to assume local auth'd users if you're going to protect
+#         access. Default false. Alternately, you can pass in an
+#         :auth => true/false to 'search', which will override config.
+#         PC has limited access for non-auth users.
+# [:lang] Primo lang query param. "Hints input languages to search engine for language recognition. "
+#         For now hardcoded into config, not settable per request.default 'eng'
+# [:fixed_params]  Extra url query params to add on to every search request.
+#               Can be used to hard-code certain limits, such as:
+#               {"query_exc" => ["facet_rtype,exact,books", "something_else"]}
+#               Note neither key nor values are uri encoded, we'll take
+#               care of that for you. value can be array or single string.
+#
+# == Vendor docs
+#
+# http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
+class BentoSearch::PrimoEngine
+  include BentoSearch::SearchEngine
+  extend HTTPClientPatch::IncludeClient
+  include_http_client
+  def search_implementation(args)
+    url = construct_query(args)
+    response = http_client.get(url)
+    response_xml = Nokogiri::XML response.body
+    # namespaces really do nobody any good
+    response_xml.remove_namespaces!
+    results = BentoSearch::Results.new
+    results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
+    response_xml.xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET/DOC").each do |doc_xml|
+      item = BentoSearch::ResultItem.new
+      # Data in primo response is confusing in many different places in
+      # variant formats. We try to pick out the best to take things from,
+      # but we're guessing, it's under-documented.
+      item.title      = text_at_xpath(doc_xml, "./PrimoNMBib/record/display/title")
+      item.abstract   = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/abstract")
+      doc_xml.xpath("./PrimoNMBib/record/facets/creatorcontrib").each do |author_node|
+        item.authors << BentoSearch::Author.new(:display => author_node.text)
+      end
+      item.journal_title  = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/jtitle")
+      # check btitle for book chapters, the book they are in.
+      if item.journal_title.blank? && doc_xml.at_xpath("./PrimoNMBib/record/display/ispartof")
+        item.journal_title = text_at_xpath(doc_xml, "./PrimoNMBib/record/addata/btitle")
+      end
+      item.publisher      = text_at_xpath doc_xml, "./PrimoNMBib/record/display/publisher"
+      item.volume         = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/volume"
+      item.issue          = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issue"
+      item.start_page     = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/spage"
+      item.end_page       = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/epage"
+      item.doi            = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/doi"
+      item.issn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
+      item.isbn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
+      if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
+        item.year = date[0,4] # first four chars
+      end
+      if fmt_str = text_at_xpath(doc_xml, "./PrimoNMBib/record/search/rsrctype")
+        # 'article', 'book_chapter'. abuse rails to turn into nice titlelized english.
+        item.format_str     = fmt_str.titleize
+        item.format         = map_format fmt_str
+      end
+      #TODO formats, highlighting
+      results << item
+    end
+    return results
+  end
+  # Try to map from primocentral's 'rsrctype' to our own internal
+  # taxonomy of formats
+  #
+  # Need docs on what the complete Primo vocabulary here is, we're
+  # just guessing from what we see.
+  def map_format(str)
+    case str
+    when "article", "newspaper_article", "review"
+      then "Article"
+    when "book"           then "Book"
+    when "dissertation"   then :dissertation
+    end
+  end
+  # Returns the text() at the xpath, if the xpath is non-nil
+  # and the text is non-blank
+  def text_at_xpath(xml, xpath)
+    node = xml.at_xpath(xpath)
+    return nil if node.nil?
+    text = node.text
+    return nil if node.blank?
+    return text
+  end
+  # From config or args, args over-ride config
+  def authenticated_end_user?(args)
+    config = configuration.auth ? true : false
+    arg = args[:auth]
+    if ! arg.nil?
+      arg ? true : false
+    elsif ! config.nil?
+      config ? true : false
+    else
+      false
+    end
+  end
+  # Docs say we need to replace any commas with spaces
+  def prepared_query(str)
+    str.gsub(/\,/, ' ')
+  end
+  def construct_query(args)
+    url = "http://#{configuration.host_port}/PrimoWebServices/xservice/search/brief"
+    url += "?institution=#{configuration.institution}"
+    url += "&loc=#{CGI.escape configuration.loc}"
+    url += "&lang=#{CGI.escape configuration.lang}"
+    url += "&bulkSize=#{args[:per_page]}" if args[:per_page]
+    # primo indx is 1-based record index, our :start is 0-based.
+    url += "&indx=#{args[:start] + 1}" if args[:start]
+    if (defn = self.sort_definitions[ args[:sort] ]) &&
+        (value = defn[:implementation])
+      url += "&sortField=#{CGI.escape value}"
+    end
+    url += "&onCampus=#{ authenticated_end_user?(args) ? 'true' : 'false'}"
+    field = args[:search_field].present? ? args[:search_field] : "any"
+    query = "#{field},contains,#{prepared_query args[:query]}"
+    url += "&query=#{CGI.escape query}"
+    configuration.fixed_params.each_pair do |key, value|
+      [value].flatten.each do |v|
+        url += "&#{CGI.escape key.to_s}=#{CGI.escape v.to_s}"
+      end
+    end
+    return url
+  end
+  def search_field_definitions
+    # others are avail too, this is not exhaustive.
+    {
+      "creator"   => {:semantic => :author},
+      "title"     => {:semantic => :title},
+      "sub"       => {:semantic => :subject},
+      "isbn"      => {:semantic => :isbn},
+      "issn"      => {:semantic => :issn}
+    }
+  end
+  def sort_definitions
+    {
+      "title_asc"       => {:implementation => "stitle"},
+      "date_desc"       => {:implementation => "scdate"},
+      "author_asc"      => {:implementation => "screator"},
+      # As far as I can tell, what they call 'popularity'
+      # is really relevance, with popularity boosting.
+      "relevance"       => {:implementation => "popularity"}
+    }
+  end
+  def self.required_configuration
+    [:host_port, :institution]
+  end
+  def self.default_configuration
+    {
+      :loc => 'adaptor,primo_central_multiple_fe',
+      # "eng" or "fre" or "ger" (Code for the representation of name of language conform to ISO-639)
+      :lang => "eng",
+      :fixed_params => {}
+    }
+  end
+end

data/app/search_engines/bento_search/scopus_engine.rb ADDED Viewed

@@ -0,0 +1,295 @@
+require 'cgi'
+require 'nokogiri'
+require 'http_client_patch/include_client'
+require 'httpclient'
+module BentoSearch
+  # Supports fielded searching, sorting, pagination.
+  #
+  # Required configuration:
+  # * api_key
+  #
+  # Defaults to 'relevance' sort, rather than scopus's default of date desc.
+  #
+  # Uses the Scopus SciVerse REST API. You need to be a Scopus customer
+  # to access. http://api.elsevier.com
+  # http://www.developers.elsevier.com/action/devprojects
+  #
+  # ToS: http://www.developers.elsevier.com/devcms/content-policies
+  # "Federated Search" use case.
+  # Also: http://www.developers.elsevier.com/cms/apiserviceagreement
+  #
+  # Note that ToS applying to you probably means you must restrict access
+  # to search functionality to authenticated affiliated users only.
+  #
+  # Register for an API key at "Register New Site" at http://developers.elsevier.com/action/devnewsite
+  # You will then need to get server IP addresses registered with Scopus too,
+  # apparently by emailing directly to dave.santucci at elsevier dot com.
+  #
+  # Scopus API Docs:
+  # * http://www.developers.elsevier.com/devcms/content-api-search-request
+  # * http://www.developers.elsevier.com/devcms/content/search-fields-overview
+  #
+  # Some more docs on response elements and query elements:
+  # * http://api.elsevier.com/content/search/#d0n14606
+  #
+  # Other API's in the suite not being used by this code at present:
+  # * http://www.developers.elsevier.com/devcms/content-api-retrieval-request
+  # * http://www.developers.elsevier.com/devcms/content-api-metadata-request
+  #
+  # Support: Integration@scopus.com
+  #
+  # TODO: Mention to Scopus: Only one author?
+  # Paging of 50 gets an error, but docs say I should be able to request 200. q
+  #
+  class ScopusEngine
+    include BentoSearch::SearchEngine
+    extend HTTPClientPatch::IncludeClient
+    include_http_client
+    def search_implementation(args)
+      results = Results.new
+      xml, response, exception = nil, nil, nil
+      url = scopus_url(args)
+      begin
+        response = http_client.get( url , nil,
+          # HTTP headers.
+          {"X-ELS-APIKey" => configuration.api_key,
+          "X-ELS-ResourceVersion" => "XOCS",
+          "Accept" => "application/atom+xml"}
+        )
+        xml = Nokogiri::XML(response.body)
+      rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError  => e
+        exception = e
+      end
+      # handle errors
+      if (response.nil? || xml.nil? || exception ||
+          (! HTTP::Status.successful? response.status) ||
+          xml.at_xpath("service-error")
+          )
+        # UGH. Scopus reports 0 hits as an error, not entirely distinguishable
+        # from an actual error. Oh well, we have to go with it.
+        if (
+            (response.status == 400) &&
+            xml &&
+            (error_xml = xml.at_xpath("./service-error/status")) &&
+            (node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
+            (node_text(error_xml.at_xpath("./statusText")) == "Result set was empty or Start value beyond result set")
+          )
+          # PROBABLY 0 hit count, although could be something else I'm afraid.
+          results.total_items = 0
+          return results
+        else
+          # real error
+          results.error ||= {}
+          results.error[:exception] = e
+          results.error[:status] = response.status if response
+          # keep from storing the entire possibly huge response as error
+          # but sometimes it's an error message.
+          results.error[:error_info] = xml.at_xpath("service_error") if xml
+          return results
+        end
+      end
+      results.total_items = (node_text xml.at_xpath("//opensearch:totalResults", xml_ns)).to_i
+      xml.xpath("//atom:entry", xml_ns).each do | entry |
+        results << (item = ResultItem.new)
+        if scopus_link = entry.at_xpath("atom:link[@ref='scopus']", xml_ns)
+          item.link = scopus_link["href"]
+        end
+        item.title          = node_text entry.at_xpath("dc:title", xml_ns)
+        item.journal_title  = node_text entry.at_xpath("prism:publicationName", xml_ns)
+        item.issn           = node_text entry.at_xpath("prism:issn", xml_ns)
+        item.volume         = node_text entry.at_xpath("prism:volume", xml_ns)
+        item.issue          = node_text entry.at_xpath("prism:issueIdentifier", xml_ns)
+        item.doi            = node_text entry.at_xpath("prism:doi", xml_ns)
+        # pages might be in startingPage/endingPage OR in pageRange
+        if (start = entry.at_xpath("prism:startingPage", xml_ns))
+          item.start_page = start.text.to_i
+          if ( epage = entry.at_xpath("prism:endingPage", xml_ns))
+            item.end_page = epage.text.to_i
+          end
+        elsif (range = entry.at_xpath("prism:pageRange", xml_ns))
+          (spage, epage) = *range.text().split("-")
+          item.start_page = spage
+          item.end_page = epage
+        end
+        # get the year out of the date
+        if date = entry.at_xpath("prism:coverDate", xml_ns)
+          date.text =~ /^(\d\d\d\d)/
+          item.year = $1.to_i if $1
+        end
+        # Authors might be in atom:authors seperated by |, or just
+        # a single one in dc:creator
+        if (authors = entry.at_xpath("atom:authors", xml_ns))
+          authors.text.split("|").each do |author|
+            item.authors << Author.new(:display => author.strip)
+          end
+        elsif (author = entry.at_xpath("dc:creator", xml_ns))
+          item.authors << Author.new(:display => author.text.strip)
+        end
+        # Format we're still trying to figure out how Scopus API
+        # delivers it. Here is at at least one way.
+        if (doctype = entry.at_xpath("atom:subtype", xml_ns))
+          item.format     = doctype_to_format(doctype.text)
+          item.format_str = doctype_to_string(doctype.text)
+        end
+      end
+      return results
+    end
+    # The escaping rules are not entirely clear for the API. We know colons
+    # and parens are special chars. It's unclear how or if we can escape them,
+    # we'll just remove them.
+    def escape_query(query)
+      # backslash escape doesn't seem to work
+      #query.gsub(/([\\\(\)\:])/) do |match|
+      #  "\\#{$1}"
+      #end
+      query.gsub(/([\\\(\)\:])/, ' ')
+    end
+    def self.required_configuration
+      ["api_key"]
+    end
+    def self.default_configuration
+      {
+        :base_url => "http://api.elsevier.com/",
+        :cluster => "SCOPUS"
+      }
+    end
+    # Max per-page is 200, as per http://www.developers.elsevier.com/devcms/content-apis, bottom of page.
+    def max_per_page
+      200
+    end
+    def search_field_definitions
+      {
+        "AUTH"        => {:semantic => :author},
+        "TITLE"       => {:semantic => :title},
+        # controlled and author-assigned keywords
+        "KEY"         => {:semantic => :subject},
+        "ISBN"        => {:semantic => :isbn},
+        "ISSN"        => {:semantic => :issn},
+      }
+    end
+    def sort_definitions
+      # scopus &sort= values, not yet URI-escaped, later code will do that.
+      #
+      # 'refeid' key is currently undocumented on Scopus site, but
+      # was given to me in email by scopus.
+      {
+        "title_asc"     => {:implementation => "+itemtitle"},
+        "date_desc"     => {:implementation => "-datesort,+auth"},
+        "relevance"     => {:implementation => "refeid" },
+        "author_asc"    => {:implementation => "+auth"},
+        "num_cite_desc" => {:implementation => "-numcitedby"}
+      }
+    end
+    protected
+    # returns nil if passed in nil, otherwise
+    # returns nokogiri text()
+    def node_text(node)
+      return nil if node.nil?
+      return node.text()
+    end
+    def xml_ns
+      {"opensearch" => "http://a9.com/-/spec/opensearch/1.1/",
+       "prism"      => "http://prismstandard.org/namespaces/basic/2.0/",
+       "dc"         => "http://purl.org/dc/elements/1.1/",
+       "atom"       => "http://www.w3.org/2005/Atom"}
+     end
+    # Maps from Scopus "doctype" as listed at http://www.developers.elsevier.com/devcms/content/search-fields-overview
+    # and delivered in the XML response as atom:subtype.
+    # Maps to our own internal formats as documented in ResultItem#format
+    # Returns nil if can't map.
+    def doctype_to_format(doctype)
+      { "ar" => "Article",
+        "ip" => "Article",
+        "bk" => "Book",
+        "bz" => "Article",
+        "re" => "Article", # most of what scopus labels 'Report' seem to be ordinary articles.
+        "cp" => :conference_paper,
+        "re" => "Article", # really 'report', but Scopus is unreliable here, most of these are actually articles.
+        "sh" => "Article", # 'short survey' to scopus, but seems to be used for articles.
+        "ip" => "Article", # 'article in press'.
+        'ed' => "Article", # Editorial
+        'le' => "Article", # Letter
+        'no' => "Article", # Note
+      }[doctype.to_s]
+    end
+    # Maps Scopus doctype to human readable strings as documented by Scopus,
+    # does not map 1-1 to our controlled format.
+    def doctype_to_string(doctype)
+      { "ar" => "Article",
+        "ab" => "Abstract Report",
+        "ip" => "Article in Press",
+        "bk" => "Book",
+        "bz" => "Business Article",
+        "cp" => "Conference Paper",
+        "cr" => "Conference Review",
+        "ed" => "Editorial",
+        "er" => "Erratum",
+        "le" => "Letter",
+        "no" => "Note",
+        "pr" => "Press Release",
+        "re" => "Article", # Really 'report', but Scopus is unreliable here, most of these are actually articles.
+        "sh" => "Article" # Really 'short survey' to Scopus, but seems to be used for, well, articles.
+      }[doctype.to_s]
+    end
+    def scopus_url(args)
+      query = escape_query args[:query]
+      if args[:search_field]
+        query = "#{args[:search_field]}(#{query})"
+      end
+      query = "#{configuration.base_url.chomp("/")}/content/search/index:#{configuration.cluster}?query=#{CGI.escape(query)}"
+      query += "&count=#{args[:per_page]}" if args[:per_page]
+      query += "&start=#{args[:start]}" if args[:start]
+      # default to 'relevance' sort if not given, rather than scopus's
+      # default of date desc.
+      args[:sort] ||= "relevance"
+      if (defn = self.sort_definitions[args[:sort]]) &&
+         ( value = defn[:implementation])
+        query += "&sort=#{CGI.escape(value)}"
+      end
+      return query
+    end
+  end
+end