bento_search 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.md +6 -5
 - data/app/assets/javascripts/bento_search/ajax_load.js +42 -16
 - data/app/assets/stylesheets/bento_search/suggested_styles.css +9 -0
 - data/app/controllers/bento_search/search_controller.rb +15 -6
 - data/app/helpers/bento_search_helper.rb +24 -8
 - data/app/item_decorators/bento_search/no_links.rb +13 -0
 - data/app/models/bento_search/openurl_creator.rb +18 -8
 - data/app/models/bento_search/registrar.rb +2 -6
 - data/app/models/bento_search/result_item.rb +43 -3
 - data/app/models/bento_search/results.rb +4 -0
 - data/app/models/bento_search/search_engine.rb +25 -23
 - data/app/search_engines/bento_search/ebsco_host_engine.rb +42 -17
 - data/app/search_engines/bento_search/google_books_engine.rb +2 -0
 - data/app/search_engines/bento_search/google_site_search_engine.rb +177 -0
 - data/app/search_engines/bento_search/mock_engine.rb +5 -0
 - data/app/search_engines/bento_search/primo_engine.rb +23 -2
 - data/app/search_engines/bento_search/scopus_engine.rb +4 -1
 - data/app/search_engines/bento_search/summon_engine.rb +4 -14
 - data/app/search_engines/bento_search/worldcat_sru_dc_engine.rb +293 -0
 - data/app/views/bento_search/_std_item.html.erb +4 -5
 - data/app/views/bento_search/_wrap_with_count.html.erb +20 -0
 - data/app/views/bento_search/search/search.html.erb +15 -1
 - data/config/locales/en.yml +6 -4
 - data/lib/bento_search/util.rb +13 -0
 - data/lib/bento_search/version.rb +1 -1
 - data/test/dummy/log/development.log +1 -0
 - data/test/dummy/log/test.log +24357 -0
 - data/test/functional/bento_search/search_controller_test.rb +39 -0
 - data/test/helper/bento_search_helper_test.rb +47 -5
 - data/test/unit/ebsco_host_engine_test.rb +15 -0
 - data/test/unit/google_books_engine_test.rb +1 -0
 - data/test/unit/google_site_search_test.rb +122 -0
 - data/test/unit/item_decorators_test.rb +12 -1
 - data/test/unit/openurl_creator_test.rb +19 -3
 - data/test/unit/primo_engine_test.rb +5 -3
 - data/test/unit/result_item_test.rb +36 -1
 - data/test/unit/search_engine_test.rb +27 -4
 - data/test/unit/worldcat_sru_dc_engine_test.rb +120 -0
 - data/test/vcr_cassettes/google_site/basic_smoke_test.yml +254 -0
 - data/test/vcr_cassettes/google_site/empty_result_set.yml +53 -0
 - data/test/vcr_cassettes/google_site/pagination_object_is_correct_for_actual_page_when_you_ask_for_too_far.yml +260 -0
 - data/test/vcr_cassettes/google_site/with_highlighting.yml +265 -0
 - data/test/vcr_cassettes/google_site/without_highlighting.yml +267 -0
 - data/test/vcr_cassettes/primo/proper_tags_for_snippets.yml +517 -502
 - data/test/vcr_cassettes/primo/search_smoke_test.yml +1 -1
 - data/test/vcr_cassettes/worldcat_sru_dc/smoke_test.yml +628 -0
 - metadata +40 -4
 
| 
         @@ -61,10 +61,10 @@ require 'httpclient' 
     | 
|
| 
       61 
61 
     | 
    
         
             
            #  Hard to find docs page on embedding EBSCO limiters (like peer reviewed only "RV Y") in search query: 
         
     | 
| 
       62 
62 
     | 
    
         
             
            #     http://eit.ebscohost.com/Pages/MethodDescription.aspx?service=~/Services/SearchService.asmx&method=Info
         
     | 
| 
       63 
63 
     | 
    
         
             
            #
         
     | 
| 
       64 
     | 
    
         
            -
            # 
         
     | 
| 
       65 
     | 
    
         
            -
            #
         
     | 
| 
       66 
     | 
    
         
            -
            #  
     | 
| 
       67 
     | 
    
         
            -
            #  
     | 
| 
      
 64 
     | 
    
         
            +
            # == Limitations
         
     | 
| 
      
 65 
     | 
    
         
            +
            # We do set language of ResultItems based on what ebsco tells us, but ebsoc
         
     | 
| 
      
 66 
     | 
    
         
            +
            # seems to tell us 'english' for everything (maybe cause abstract is in
         
     | 
| 
      
 67 
     | 
    
         
            +
            # English?). Config variable to tell us to ignore language?
         
     | 
| 
       68 
68 
     | 
    
         
             
            class BentoSearch::EbscoHostEngine
         
     | 
| 
       69 
69 
     | 
    
         
             
              include BentoSearch::SearchEngine
         
     | 
| 
       70 
70 
     | 
    
         | 
| 
         @@ -85,7 +85,7 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       85 
85 
     | 
    
         | 
| 
       86 
86 
     | 
    
         
             
                results = BentoSearch::Results.new
         
     | 
| 
       87 
87 
     | 
    
         
             
                xml, response, exception = nil, nil, nil
         
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
      
 88 
     | 
    
         
            +
             
     | 
| 
       89 
89 
     | 
    
         
             
                begin
         
     | 
| 
       90 
90 
     | 
    
         
             
                  response = http_client.get(url)
         
     | 
| 
       91 
91 
     | 
    
         
             
                  xml = Nokogiri::XML(response.body)
         
     | 
| 
         @@ -144,12 +144,13 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       144 
144 
     | 
    
         
             
              def sniff_format(xml_node)
         
     | 
| 
       145 
145 
     | 
    
         
             
                return nil if xml_node.nil?
         
     | 
| 
       146 
146 
     | 
    
         | 
| 
       147 
     | 
    
         
            -
             
     | 
| 
      
 147 
     | 
    
         
            +
             
     | 
| 
      
 148 
     | 
    
         
            +
                if xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
         
     | 
| 
      
 149 
     | 
    
         
            +
                  "Article"
         
     | 
| 
      
 150 
     | 
    
         
            +
                elsif xml_node.at_xpath("./bkinfo/*")
         
     | 
| 
       148 
151 
     | 
    
         
             
                  "Book"
         
     | 
| 
       149 
152 
     | 
    
         
             
                elsif xml_node.at_xpath("./dissinfo/*")
         
     | 
| 
       150 
153 
     | 
    
         
             
                  :dissertation
         
     | 
| 
       151 
     | 
    
         
            -
                elsif xml_node.at_xpath("./jinfo/*") && xml_node.at_xpath("./artinfo/*")
         
     | 
| 
       152 
     | 
    
         
            -
                  "Article"
         
     | 
| 
       153 
154 
     | 
    
         
             
                elsif xml_node.at_xpath("./jinfo/*")
         
     | 
| 
       154 
155 
     | 
    
         
             
                  :serial
         
     | 
| 
       155 
156 
     | 
    
         
             
                else
         
     | 
| 
         @@ -172,12 +173,19 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       172 
173 
     | 
    
         | 
| 
       173 
174 
     | 
    
         
             
                components = components.collect {|a| a.titlecase if a}
         
     | 
| 
       174 
175 
     | 
    
         
             
                components.uniq! # no need to have the same thing twice
         
     | 
| 
       175 
     | 
    
         
            -
             
     | 
| 
       176 
     | 
    
         
            -
                # some hard-coded cases for better user-displayable string
         
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
      
 177 
     | 
    
         
            +
                # some hard-coded cases for better user-displayable string, and other
         
     | 
| 
      
 178 
     | 
    
         
            +
                # normalization. 
         
     | 
| 
       177 
179 
     | 
    
         
             
                if ["Academic Journal", "Journal"].include?(components.first) && ["Article", "Journal Article"].include?(components.last)
         
     | 
| 
       178 
180 
     | 
    
         
             
                  return "Journal Article"
         
     | 
| 
       179 
181 
     | 
    
         
             
                elsif components.first == "Periodical" && components.length > 1
         
     | 
| 
       180 
182 
     | 
    
         
             
                  return components.last
         
     | 
| 
      
 183 
     | 
    
         
            +
                elsif components.size == 2 && components.first.include?(components.last)
         
     | 
| 
      
 184 
     | 
    
         
            +
                  # last is strict substring, don't need it
         
     | 
| 
      
 185 
     | 
    
         
            +
                  return components.first
         
     | 
| 
      
 186 
     | 
    
         
            +
                elsif components.size == 2 && components.last.include?(components.first)
         
     | 
| 
      
 187 
     | 
    
         
            +
                  # first is strict substring, don't need it
         
     | 
| 
      
 188 
     | 
    
         
            +
                  return components.last
         
     | 
| 
       181 
189 
     | 
    
         
             
                end
         
     | 
| 
       182 
190 
     | 
    
         | 
| 
       183 
191 
     | 
    
         | 
| 
         @@ -191,11 +199,15 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       191 
199 
     | 
    
         
             
              end
         
     | 
| 
       192 
200 
     | 
    
         | 
| 
       193 
201 
     | 
    
         | 
| 
       194 
     | 
    
         
            -
              #  
     | 
| 
       195 
     | 
    
         
            -
              # or what the special chars are. But we know parens are special, can't
         
     | 
| 
       196 
     | 
    
         
            -
              # escape em, we'll just remove em (should not effect search). 
         
     | 
| 
      
 202 
     | 
    
         
            +
              # escape or replace special chars to ebsco 
         
     | 
| 
       197 
203 
     | 
    
         
             
              def ebsco_query_escape(txt)
         
     | 
| 
       198 
     | 
    
         
            -
                 
     | 
| 
      
 204 
     | 
    
         
            +
                # it's unclear if ebsco API actually allows escaping of special chars,
         
     | 
| 
      
 205 
     | 
    
         
            +
                # or what the special chars are. But we know parens are special, can't
         
     | 
| 
      
 206 
     | 
    
         
            +
                # escape em, we'll just remove em (should not effect search).
         
     | 
| 
      
 207 
     | 
    
         
            +
                
         
     | 
| 
      
 208 
     | 
    
         
            +
                # undocumented but question mark seems to cause a problem for ebsco,
         
     | 
| 
      
 209 
     | 
    
         
            +
                # even inside quoted phrases, not sure why. 
         
     | 
| 
      
 210 
     | 
    
         
            +
                txt.gsub(/[)(\?]/, ' ')
         
     | 
| 
       199 
211 
     | 
    
         
             
              end
         
     | 
| 
       200 
212 
     | 
    
         | 
| 
       201 
213 
     | 
    
         
             
              # Actually turn the user's query into an EBSCO "AND" boolean query,
         
     | 
| 
         @@ -208,7 +220,7 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       208 
220 
     | 
    
         | 
| 
       209 
221 
     | 
    
         
             
                # Remove parens in non-phrase-quoted terms
         
     | 
| 
       210 
222 
     | 
    
         
             
                terms = terms.collect do |t| 
         
     | 
| 
       211 
     | 
    
         
            -
                   
     | 
| 
      
 223 
     | 
    
         
            +
                  ebsco_query_escape(t)      
         
     | 
| 
       212 
224 
     | 
    
         
             
                end
         
     | 
| 
       213 
225 
     | 
    
         | 
| 
       214 
226 
     | 
    
         
             
                # Remove boolean operators if they are bare not in a phrase, they'll
         
     | 
| 
         @@ -233,6 +245,7 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       233 
245 
     | 
    
         | 
| 
       234 
246 
     | 
    
         
             
                query = ebsco_query_prepare  args[:query]  
         
     | 
| 
       235 
247 
     | 
    
         | 
| 
      
 248 
     | 
    
         
            +
                
         
     | 
| 
       236 
249 
     | 
    
         
             
                # wrap in (FI $query) if fielded search
         
     | 
| 
       237 
250 
     | 
    
         
             
                if args[:search_field]
         
     | 
| 
       238 
251 
     | 
    
         
             
                  query = "(#{args[:search_field]} #{query})"
         
     | 
| 
         @@ -253,7 +266,7 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       253 
266 
     | 
    
         
             
                configuration.databases.each do |db|
         
     | 
| 
       254 
267 
     | 
    
         
             
                  url += "&db=#{db}"
         
     | 
| 
       255 
268 
     | 
    
         
             
                end    
         
     | 
| 
       256 
     | 
    
         
            -
             
     | 
| 
      
 269 
     | 
    
         
            +
             
     | 
| 
       257 
270 
     | 
    
         
             
                return url
         
     | 
| 
       258 
271 
     | 
    
         
             
              end
         
     | 
| 
       259 
272 
     | 
    
         | 
| 
         @@ -267,8 +280,16 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       267 
280 
     | 
    
         
             
                item.link           = get_link(xml_rec)
         
     | 
| 
       268 
281 
     | 
    
         | 
| 
       269 
282 
     | 
    
         
             
                item.issn           = text_if_present info.at_xpath("./jinfo/issn")
         
     | 
| 
       270 
     | 
    
         
            -
             
     | 
| 
      
 283 
     | 
    
         
            +
             
     | 
| 
      
 284 
     | 
    
         
            +
                item.journal_title  = text_if_present(info.at_xpath("./jinfo/jtl"))
         
     | 
| 
       271 
285 
     | 
    
         
             
                item.publisher      = text_if_present info.at_xpath("./pubinfo/pub")
         
     | 
| 
      
 286 
     | 
    
         
            +
                # if no publisher, but a dissertation institution, use that
         
     | 
| 
      
 287 
     | 
    
         
            +
                # as publisher. 
         
     | 
| 
      
 288 
     | 
    
         
            +
                unless item.publisher
         
     | 
| 
      
 289 
     | 
    
         
            +
                  item.publisher    = text_if_present info.at_xpath("./dissinfo/dissinst")
         
     | 
| 
      
 290 
     | 
    
         
            +
                end
         
     | 
| 
      
 291 
     | 
    
         
            +
                
         
     | 
| 
      
 292 
     | 
    
         
            +
                
         
     | 
| 
       272 
293 
     | 
    
         
             
                # Might have multiple ISBN's in record, just take first for now
         
     | 
| 
       273 
294 
     | 
    
         
             
                item.isbn           = text_if_present info.at_xpath("./bkinfo/isbn")
         
     | 
| 
       274 
295 
     | 
    
         | 
| 
         @@ -298,6 +319,10 @@ class BentoSearch::EbscoHostEngine 
     | 
|
| 
       298 
319 
     | 
    
         
             
                item.format         = sniff_format info
         
     | 
| 
       299 
320 
     | 
    
         
             
                item.format_str     = sniff_format_str info
         
     | 
| 
       300 
321 
     | 
    
         | 
| 
      
 322 
     | 
    
         
            +
                # Totally unreliable, seems to report english for everything? Maybe
         
     | 
| 
      
 323 
     | 
    
         
            +
                # because abstracts are in english? Nevertheless we include for now.
         
     | 
| 
      
 324 
     | 
    
         
            +
                item.language_code   = text_if_present info.at_xpath("./language/@code")
         
     | 
| 
      
 325 
     | 
    
         
            +
                
         
     | 
| 
       301 
326 
     | 
    
         | 
| 
       302 
327 
     | 
    
         
             
                return item
         
     | 
| 
       303 
328 
     | 
    
         
             
              end
         
     | 
| 
         @@ -0,0 +1,177 @@ 
     | 
|
| 
      
 1 
     | 
    
         
            +
            require 'cgi'
         
     | 
| 
      
 2 
     | 
    
         
            +
            require 'multi_json'
         
     | 
| 
      
 3 
     | 
    
         
            +
             
     | 
| 
      
 4 
     | 
    
         
            +
            require 'http_client_patch/include_client'
         
     | 
| 
      
 5 
     | 
    
         
            +
            require 'httpclient'
         
     | 
| 
      
 6 
     | 
    
         
            +
             
     | 
| 
      
 7 
     | 
    
         
            +
            #
         
     | 
| 
      
 8 
     | 
    
         
            +
            # An adapter for Google Site Search/Google Custom Search 
         
     | 
| 
      
 9 
     | 
    
         
            +
            #
         
     | 
| 
      
 10 
     | 
    
         
            +
            # I think those are the same thing now, but may get differnet names
         
     | 
| 
      
 11 
     | 
    
         
            +
            # depending on whether you are paying for getting for free. The free
         
     | 
| 
      
 12 
     | 
    
         
            +
            # version only gives you 100 requests/day courtesy limit for testing. 
         
     | 
| 
      
 13 
     | 
    
         
            +
            #
         
     | 
| 
      
 14 
     | 
    
         
            +
            # Create a custom/site search: http://www.google.com/cse
         
     | 
| 
      
 15 
     | 
    
         
            +
            # API docs: https://developers.google.com/custom-search/v1/overview
         
     | 
| 
      
 16 
     | 
    
         
            +
            # API console to get API key? https://code.google.com/apis/console/?pli=1#project:183362013039
         
     | 
| 
      
 17 
     | 
    
         
            +
            #
         
     | 
| 
      
 18 
     | 
    
         
            +
            # == Limitations
         
     | 
| 
      
 19 
     | 
    
         
            +
            #
         
     | 
| 
      
 20 
     | 
    
         
            +
            # * per-page is max 10, which makes it not too too useful. If you ask for more, you'll get an exception.
         
     | 
| 
      
 21 
     | 
    
         
            +
            # * Google only lets you look at first 10 pages. If you ask for more, it won't raise,
         
     | 
| 
      
 22 
     | 
    
         
            +
            #   it'll just give you the last page google will let you have. pagintion object
         
     | 
| 
      
 23 
     | 
    
         
            +
            #   in result set will be appropriate for page you actually got though. 
         
     | 
| 
      
 24 
     | 
    
         
            +
            # * 'abstract' field always filled out with relevant snippets from google api.  
         
     | 
| 
      
 25 
     | 
    
         
            +
            # * Google API supports custom 'structured data' in your web pages (from microdata and meta tags?)
         
     | 
| 
      
 26 
     | 
    
         
            +
            #   for custom sorting and limiting and maybe field searching -- but this code
         
     | 
| 
      
 27 
     | 
    
         
            +
            #   does not currently support that. it could be added as custom config in some way. 
         
     | 
| 
      
 28 
     | 
    
         
            +
            # * The URL in display form is put in ResultItem#source_title
         
     | 
| 
      
 29 
     | 
    
         
            +
            #   That should result in it rendering in a reasonable place with standard display
         
     | 
| 
      
 30 
     | 
    
         
            +
            #   templates. 
         
     | 
| 
      
 31 
     | 
    
         
            +
            # * Sort: only relevance and date_desc. Custom sorts based on structured data not supported.   
         
     | 
| 
      
 32 
     | 
    
         
            +
            # * no search fields supported at present. may possibly add later after more
         
     | 
| 
      
 33 
     | 
    
         
            +
            #   investigation, google api may support both standard intitle etc, as well
         
     | 
| 
      
 34 
     | 
    
         
            +
            #   as custom attributes added in microdata to your pages. 
         
     | 
| 
      
 35 
     | 
    
         
            +
            # * ResultItem's will be set to have no OpenURLs, since no useful ones can be constructed. 
         
     | 
| 
      
 36 
     | 
    
         
            +
            #
         
     | 
| 
      
 37 
     | 
    
         
            +
            # == Required config params
         
     | 
| 
      
 38 
     | 
    
         
            +
            # [:api_key]  api_key from google, get from Google API Console
         
     | 
| 
      
 39 
     | 
    
         
            +
            # [:cx]       identifier for specific google CSE, get from "Search engine unique ID" in CSE "Control Panel"
         
     | 
| 
      
 40 
     | 
    
         
            +
            #
         
     | 
| 
      
 41 
     | 
    
         
            +
            # == Optional config params
         
     | 
| 
      
 42 
     | 
    
         
            +
            #
         
     | 
| 
      
 43 
     | 
    
         
            +
            # [:highlighting]  default false. if true, then title, display url, and snippets will
         
     | 
| 
      
 44 
     | 
    
         
            +
            #                  have HTML <b> tags in them, and be html_safe. If false, plain
         
     | 
| 
      
 45 
     | 
    
         
            +
            #                  ascii, but you'll still get snippets. 
         
     | 
| 
      
 46 
     | 
    
         
            +
            class BentoSearch::GoogleSiteSearchEngine
         
     | 
| 
      
 47 
     | 
    
         
            +
              include BentoSearch::SearchEngine
         
     | 
| 
      
 48 
     | 
    
         
            +
              
         
     | 
| 
      
 49 
     | 
    
         
            +
              extend HTTPClientPatch::IncludeClient
         
     | 
| 
      
 50 
     | 
    
         
            +
              include_http_client
         
     | 
| 
      
 51 
     | 
    
         
            +
                
         
     | 
| 
      
 52 
     | 
    
         
            +
              def search_implementation(args)
         
     | 
| 
      
 53 
     | 
    
         
            +
                results = BentoSearch::Results.new
         
     | 
| 
      
 54 
     | 
    
         
            +
                
         
     | 
| 
      
 55 
     | 
    
         
            +
                url = construct_query(args)
         
     | 
| 
      
 56 
     | 
    
         
            +
                
         
     | 
| 
      
 57 
     | 
    
         
            +
                response = http_client.get(url)
         
     | 
| 
      
 58 
     | 
    
         
            +
                
         
     | 
| 
      
 59 
     | 
    
         
            +
                if response.status != 200
         
     | 
| 
      
 60 
     | 
    
         
            +
                  results.error ||= {}
         
     | 
| 
      
 61 
     | 
    
         
            +
                  results.error[:status] = response.status
         
     | 
| 
      
 62 
     | 
    
         
            +
                  results.error[:response] = response.body
         
     | 
| 
      
 63 
     | 
    
         
            +
                  return results
         
     | 
| 
      
 64 
     | 
    
         
            +
                end
         
     | 
| 
      
 65 
     | 
    
         
            +
                
         
     | 
| 
      
 66 
     | 
    
         
            +
                json = MultiJson.load(response.body)
         
     | 
| 
      
 67 
     | 
    
         
            +
                
         
     | 
| 
      
 68 
     | 
    
         
            +
                results.total_items =  json["searchInformation"]["totalResults"].to_i
         
     | 
| 
      
 69 
     | 
    
         
            +
                
         
     | 
| 
      
 70 
     | 
    
         
            +
                (json["items"] || []).each do |json_item|
         
     | 
| 
      
 71 
     | 
    
         
            +
                  item = BentoSearch::ResultItem.new
         
     | 
| 
      
 72 
     | 
    
         
            +
                  
         
     | 
| 
      
 73 
     | 
    
         
            +
                  if configuration.highlighting
         
     | 
| 
      
 74 
     | 
    
         
            +
                    item.title          = highlight_normalize json_item["htmlTitle"]
         
     | 
| 
      
 75 
     | 
    
         
            +
                    item.abstract       = highlight_normalize json_item["htmlSnippet"]
         
     | 
| 
      
 76 
     | 
    
         
            +
                    item.source_title  = highlight_normalize json_item["htmlFormattedUrl"]
         
     | 
| 
      
 77 
     | 
    
         
            +
                  else
         
     | 
| 
      
 78 
     | 
    
         
            +
                    item.title          = json_item["title"]
         
     | 
| 
      
 79 
     | 
    
         
            +
                    item.abstract       = json_item["snippet"]
         
     | 
| 
      
 80 
     | 
    
         
            +
                    item.source_title  = json_item["formattedUrl"]
         
     | 
| 
      
 81 
     | 
    
         
            +
                  end
         
     | 
| 
      
 82 
     | 
    
         
            +
                  
         
     | 
| 
      
 83 
     | 
    
         
            +
                  item.link             = json_item["link"]
         
     | 
| 
      
 84 
     | 
    
         
            +
                  
         
     | 
| 
      
 85 
     | 
    
         
            +
                  # we won't bother generating openurls for google hits, not useful
         
     | 
| 
      
 86 
     | 
    
         
            +
                  item.openurl_disabled = true
         
     | 
| 
      
 87 
     | 
    
         
            +
                  
         
     | 
| 
      
 88 
     | 
    
         
            +
                  results << item
         
     | 
| 
      
 89 
     | 
    
         
            +
                end
         
     | 
| 
      
 90 
     | 
    
         
            +
                
         
     | 
| 
      
 91 
     | 
    
         
            +
                return results
         
     | 
| 
      
 92 
     | 
    
         
            +
              end
         
     | 
| 
      
 93 
     | 
    
         
            +
              
         
     | 
| 
      
 94 
     | 
    
         
            +
              # yep, google gives us a 10 max per page. 
         
     | 
| 
      
 95 
     | 
    
         
            +
              # also only lets us look at first 10 pages, sorry. 
         
     | 
| 
      
 96 
     | 
    
         
            +
              def max_per_page
         
     | 
| 
      
 97 
     | 
    
         
            +
                10
         
     | 
| 
      
 98 
     | 
    
         
            +
              end
         
     | 
| 
      
 99 
     | 
    
         
            +
              
         
     | 
| 
      
 100 
     | 
    
         
            +
              def self.required_configuation
         
     | 
| 
      
 101 
     | 
    
         
            +
                [:api_key, :cx]
         
     | 
| 
      
 102 
     | 
    
         
            +
              end
         
     | 
| 
      
 103 
     | 
    
         
            +
              
         
     | 
| 
      
 104 
     | 
    
         
            +
              def self.default_configuration
         
     | 
| 
      
 105 
     | 
    
         
            +
                { 
         
     | 
| 
      
 106 
     | 
    
         
            +
                  :base_url => 'https://www.googleapis.com/customsearch/v1?',
         
     | 
| 
      
 107 
     | 
    
         
            +
                  :highlighting => true    
         
     | 
| 
      
 108 
     | 
    
         
            +
                }
         
     | 
| 
      
 109 
     | 
    
         
            +
              end
         
     | 
| 
      
 110 
     | 
    
         
            +
              
         
     | 
| 
      
 111 
     | 
    
         
            +
              # Google supports relevance, and date sorting. Other kinds of
         
     | 
| 
      
 112 
     | 
    
         
            +
              # sorts not generally present. Can be with custom structured data,
         
     | 
| 
      
 113 
     | 
    
         
            +
              # but we don't support that. We currently do date sorts as hard sorts,
         
     | 
| 
      
 114 
     | 
    
         
            +
              # but could be changed to be biases instead. See:
         
     | 
| 
      
 115 
     | 
    
         
            +
              # https://developers.google.com/custom-search/docs/structured_data#page_dates
         
     | 
| 
      
 116 
     | 
    
         
            +
              def sort_definitions
         
     | 
| 
      
 117 
     | 
    
         
            +
                { 
         
     | 
| 
      
 118 
     | 
    
         
            +
                  "relevance" => {},
         
     | 
| 
      
 119 
     | 
    
         
            +
                  "date_desc" => {:implementation => "date"},
         
     | 
| 
      
 120 
     | 
    
         
            +
                  "date_asc"  => {:implementation => "date:a"}
         
     | 
| 
      
 121 
     | 
    
         
            +
                }    
         
     | 
| 
      
 122 
     | 
    
         
            +
              end
         
     | 
| 
      
 123 
     | 
    
         
            +
              
         
     | 
| 
      
 124 
     | 
    
         
            +
              protected
         
     | 
| 
      
 125 
     | 
    
         
            +
              
         
     | 
| 
      
 126 
     | 
    
         
            +
              # create the URL to the google API based on normalized search args
         
     | 
| 
      
 127 
     | 
    
         
            +
              #
         
     | 
| 
      
 128 
     | 
    
         
            +
              # If you ask for pagination beyond what google will provide, it
         
     | 
| 
      
 129 
     | 
    
         
            +
              # will give you the last page google will allow AND mutate the
         
     | 
| 
      
 130 
     | 
    
         
            +
              # args hash passed in to match what you actually got!
         
     | 
| 
      
 131 
     | 
    
         
            +
              def construct_query(args)
         
     | 
| 
      
 132 
     | 
    
         
            +
                url = "#{configuration.base_url}key=#{CGI.escape configuration.api_key}&cx=#{CGI.escape configuration.cx}"
         
     | 
| 
      
 133 
     | 
    
         
            +
                url += "&q=#{CGI.escape args[:query]}"
         
     | 
| 
      
 134 
     | 
    
         
            +
                
         
     | 
| 
      
 135 
     | 
    
         
            +
                
         
     | 
| 
      
 136 
     | 
    
         
            +
                url += "&num=#{args[:per_page]}" if args[:per_page]
         
     | 
| 
      
 137 
     | 
    
         
            +
                
         
     | 
| 
      
 138 
     | 
    
         
            +
                # google 'start' is 1-based. Google won't let you paginate
         
     | 
| 
      
 139 
     | 
    
         
            +
                # past ~10 pages (101 - num). We silently max out there without
         
     | 
| 
      
 140 
     | 
    
         
            +
                # raising. 
         
     | 
| 
      
 141 
     | 
    
         
            +
                if start = args[:start]
         
     | 
| 
      
 142 
     | 
    
         
            +
                  num   = args[:per_page] || 10
         
     | 
| 
      
 143 
     | 
    
         
            +
                  start = start + 1
         
     | 
| 
      
 144 
     | 
    
         
            +
                  
         
     | 
| 
      
 145 
     | 
    
         
            +
                  if start > (101 - num)
         
     | 
| 
      
 146 
     | 
    
         
            +
                    # illegal! fix. 
         
     | 
| 
      
 147 
     | 
    
         
            +
                    start         = (101 - num)
         
     | 
| 
      
 148 
     | 
    
         
            +
                    args[:start]  = (start - 1) # ours is zero based
         
     | 
| 
      
 149 
     | 
    
         
            +
                    args[:page]   = (args[:start] / num) + 1
         
     | 
| 
      
 150 
     | 
    
         
            +
                  end
         
     | 
| 
      
 151 
     | 
    
         
            +
                    
         
     | 
| 
      
 152 
     | 
    
         
            +
                      
         
     | 
| 
      
 153 
     | 
    
         
            +
                  url += "&start=#{start}"
         
     | 
| 
      
 154 
     | 
    
         
            +
                end
         
     | 
| 
      
 155 
     | 
    
         
            +
                
         
     | 
| 
      
 156 
     | 
    
         
            +
                if (sort = args[:sort])  &&  (value = sort_definitions[sort].try {|h| h[:implementation]})    
         
     | 
| 
      
 157 
     | 
    
         
            +
                  url += "&sort=#{CGI.escape value}"
         
     | 
| 
      
 158 
     | 
    
         
            +
                end
         
     | 
| 
      
 159 
     | 
    
         
            +
                
         
     | 
| 
      
 160 
     | 
    
         
            +
                return url
         
     | 
| 
      
 161 
     | 
    
         
            +
              end
         
     | 
| 
      
 162 
     | 
    
         
            +
              
         
     | 
| 
      
 163 
     | 
    
         
            +
              # normalization for strings returned by google as 'html' with query
         
     | 
| 
      
 164 
     | 
    
         
            +
              # in context highlighting. 
         
     | 
| 
      
 165 
     | 
    
         
            +
              #
         
     | 
| 
      
 166 
     | 
    
         
            +
              # * change straight <b></b> tags given by google for highlighting
         
     | 
| 
      
 167 
     | 
    
         
            +
              # to <b class="bento_search_highight">. 
         
     | 
| 
      
 168 
     | 
    
         
            +
              # * remove <br> tags that google annoyingly puts in; we'll handle
         
     | 
| 
      
 169 
     | 
    
         
            +
              #   line wrapping ourselves thanks. 
         
     | 
| 
      
 170 
     | 
    
         
            +
              # * and mark html_safe
         
     | 
| 
      
 171 
     | 
    
         
            +
              def highlight_normalize(str)
         
     | 
| 
      
 172 
     | 
    
         
            +
                str.gsub("<b>", '<b class="bento_search_highlight">').
         
     | 
| 
      
 173 
     | 
    
         
            +
                  gsub("<br>", "").
         
     | 
| 
      
 174 
     | 
    
         
            +
                  html_safe
         
     | 
| 
      
 175 
     | 
    
         
            +
              end
         
     | 
| 
      
 176 
     | 
    
         
            +
              
         
     | 
| 
      
 177 
     | 
    
         
            +
            end
         
     | 
| 
         @@ -15,7 +15,12 @@ 
     | 
|
| 
       15 
15 
     | 
    
         
             
            class BentoSearch::MockEngine
         
     | 
| 
       16 
16 
     | 
    
         
             
                include BentoSearch::SearchEngine
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
      
 18 
     | 
    
         
            +
                # used for testing what the engine received as args
         
     | 
| 
      
 19 
     | 
    
         
            +
                attr_accessor :last_args
         
     | 
| 
      
 20 
     | 
    
         
            +
                
         
     | 
| 
       18 
21 
     | 
    
         
             
                def search_implementation(args)
         
     | 
| 
      
 22 
     | 
    
         
            +
                  self.last_args = args
         
     | 
| 
      
 23 
     | 
    
         
            +
                  
         
     | 
| 
       19 
24 
     | 
    
         
             
                  results = BentoSearch::Results.new
         
     | 
| 
       20 
25 
     | 
    
         | 
| 
       21 
26 
     | 
    
         
             
                  if configuration.error
         
     | 
| 
         @@ -53,7 +53,10 @@ require 'httpclient' 
     | 
|
| 
       53 
53 
     | 
    
         
             
            # == Vendor docs
         
     | 
| 
       54 
54 
     | 
    
         
             
            #
         
     | 
| 
       55 
55 
     | 
    
         
             
            # http://www.exlibrisgroup.org/display/PrimoOI/Brief+Search
         
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
      
 56 
     | 
    
         
            +
            #
         
     | 
| 
      
 57 
     | 
    
         
            +
            # == Notes
         
     | 
| 
      
 58 
     | 
    
         
            +
            #
         
     | 
| 
      
 59 
     | 
    
         
            +
            # Some but not all hits have language_codes provided by api. 
         
     | 
| 
       57 
60 
     | 
    
         
             
            class BentoSearch::PrimoEngine
         
     | 
| 
       58 
61 
     | 
    
         
             
              include BentoSearch::SearchEngine
         
     | 
| 
       59 
62 
     | 
    
         | 
| 
         @@ -66,13 +69,29 @@ class BentoSearch::PrimoEngine 
     | 
|
| 
       66 
69 
     | 
    
         
             
              def search_implementation(args)
         
     | 
| 
       67 
70 
     | 
    
         | 
| 
       68 
71 
     | 
    
         
             
                url = construct_query(args)
         
     | 
| 
      
 72 
     | 
    
         
            +
                        
         
     | 
| 
      
 73 
     | 
    
         
            +
                results = BentoSearch::Results.new
         
     | 
| 
       69 
74 
     | 
    
         | 
| 
       70 
75 
     | 
    
         
             
                response = http_client.get(url)
         
     | 
| 
      
 76 
     | 
    
         
            +
                if response.status != 200
         
     | 
| 
      
 77 
     | 
    
         
            +
                  results.error ||= {}
         
     | 
| 
      
 78 
     | 
    
         
            +
                  results.error[:status] = response.status
         
     | 
| 
      
 79 
     | 
    
         
            +
                  results.error[:body] = response.body
         
     | 
| 
      
 80 
     | 
    
         
            +
                  return results
         
     | 
| 
      
 81 
     | 
    
         
            +
                end
         
     | 
| 
      
 82 
     | 
    
         
            +
                  
         
     | 
| 
      
 83 
     | 
    
         
            +
                
         
     | 
| 
       71 
84 
     | 
    
         
             
                response_xml = Nokogiri::XML response.body
         
     | 
| 
       72 
85 
     | 
    
         
             
                # namespaces really do nobody any good
         
     | 
| 
       73 
86 
     | 
    
         
             
                response_xml.remove_namespaces!
         
     | 
| 
       74 
87 
     | 
    
         | 
| 
       75 
     | 
    
         
            -
                 
     | 
| 
      
 88 
     | 
    
         
            +
                
         
     | 
| 
      
 89 
     | 
    
         
            +
                if error = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/ERROR")
         
     | 
| 
      
 90 
     | 
    
         
            +
                  results.error ||= {}
         
     | 
| 
      
 91 
     | 
    
         
            +
                  results.error[:code]    = error["CODE"]
         
     | 
| 
      
 92 
     | 
    
         
            +
                  results.error[:message] = error["MESSAGE"]
         
     | 
| 
      
 93 
     | 
    
         
            +
                  return results
         
     | 
| 
      
 94 
     | 
    
         
            +
                end
         
     | 
| 
       76 
95 
     | 
    
         | 
| 
       77 
96 
     | 
    
         
             
                results.total_items = response_xml.at_xpath("./SEGMENTS/JAGROOT/RESULT/DOCSET")["TOTALHITS"].to_i
         
     | 
| 
       78 
97 
     | 
    
         | 
| 
         @@ -119,6 +138,8 @@ class BentoSearch::PrimoEngine 
     | 
|
| 
       119 
138 
     | 
    
         
             
                  item.issn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/issn"
         
     | 
| 
       120 
139 
     | 
    
         
             
                  item.isbn           = text_at_xpath doc_xml, "./PrimoNMBib/record/addata/isbn"
         
     | 
| 
       121 
140 
     | 
    
         | 
| 
      
 141 
     | 
    
         
            +
                  item.language_code  = text_at_xpath doc_xml, "./PrimoNMBib/record/display/language"
         
     | 
| 
      
 142 
     | 
    
         
            +
                  
         
     | 
| 
       122 
143 
     | 
    
         
             
                  if (date = text_at_xpath doc_xml, "./PrimoNMBib/record/search/creationdate")
         
     | 
| 
       123 
144 
     | 
    
         
             
                    item.year = date[0,4] # first four chars
         
     | 
| 
       124 
145 
     | 
    
         
             
                  end
         
     | 
| 
         @@ -42,6 +42,8 @@ module BentoSearch 
     | 
|
| 
       42 
42 
     | 
    
         
             
              # TODO: Mention to Scopus: Only one author?
         
     | 
| 
       43 
43 
     | 
    
         
             
              # Paging of 50 gets an error, but docs say I should be able to request 200. q
         
     | 
| 
       44 
44 
     | 
    
         
             
              # 
         
     | 
| 
      
 45 
     | 
    
         
            +
              # Scopus response does not seem to include language of hit, even though
         
     | 
| 
      
 46 
     | 
    
         
            +
              # api allows you to restrict by language. ask scopus if we're missing something?
         
     | 
| 
       45 
47 
     | 
    
         
             
              class ScopusEngine
         
     | 
| 
       46 
48 
     | 
    
         
             
                include BentoSearch::SearchEngine
         
     | 
| 
       47 
49 
     | 
    
         | 
| 
         @@ -62,6 +64,7 @@ module BentoSearch 
     | 
|
| 
       62 
64 
     | 
    
         
             
                      "X-ELS-ResourceVersion" => "XOCS",
         
     | 
| 
       63 
65 
     | 
    
         
             
                      "Accept" => "application/atom+xml"}
         
     | 
| 
       64 
66 
     | 
    
         
             
                    )
         
     | 
| 
      
 67 
     | 
    
         
            +
             
     | 
| 
       65 
68 
     | 
    
         
             
                    xml = Nokogiri::XML(response.body)
         
     | 
| 
       66 
69 
     | 
    
         
             
                  rescue TimeoutError, HTTPClient::ConfigurationError, HTTPClient::BadResponseError, Nokogiri::SyntaxError  => e
         
     | 
| 
       67 
70 
     | 
    
         
             
                    exception = e        
         
     | 
| 
         @@ -81,7 +84,7 @@ module BentoSearch 
     | 
|
| 
       81 
84 
     | 
    
         
             
                        xml &&
         
     | 
| 
       82 
85 
     | 
    
         
             
                        (error_xml = xml.at_xpath("./service-error/status")) &&
         
     | 
| 
       83 
86 
     | 
    
         
             
                        (node_text(error_xml.at_xpath("./statusCode")) == "INVALID_INPUT") &&
         
     | 
| 
       84 
     | 
    
         
            -
                        (node_text(error_xml.at_xpath("./statusText"))  
     | 
| 
      
 87 
     | 
    
         
            +
                        (node_text(error_xml.at_xpath("./statusText")).starts_with? "Result set was empty")
         
     | 
| 
       85 
88 
     | 
    
         
             
                      )
         
     | 
| 
       86 
89 
     | 
    
         
             
                      # PROBABLY 0 hit count, although could be something else I'm afraid. 
         
     | 
| 
       87 
90 
     | 
    
         
             
                      results.total_items = 0
         
     | 
| 
         @@ -79,6 +79,8 @@ require 'summon/transport/headers' 
     | 
|
| 
       79 
79 
     | 
    
         
             
            # headers how summon wants it, see class at
         
     | 
| 
       80 
80 
     | 
    
         
             
            # https://github.com/summon/summon.rb/blob/master/lib/summon/transport/headers.rb
         
     | 
| 
       81 
81 
     | 
    
         
             
            #
         
     | 
| 
      
 82 
     | 
    
         
            +
            # Language provided only in language_str not language_code, all that API gives
         
     | 
| 
      
 83 
     | 
    
         
            +
            # us. We could try to reverse lookup from ISO code labels later if we want. 
         
     | 
| 
       82 
84 
     | 
    
         
             
            class BentoSearch::SummonEngine
         
     | 
| 
       83 
85 
     | 
    
         
             
              include BentoSearch::SearchEngine
         
     | 
| 
       84 
86 
     | 
    
         | 
| 
         @@ -169,6 +171,8 @@ class BentoSearch::SummonEngine 
     | 
|
| 
       169 
171 
     | 
    
         
             
                    item.format_str     = doc_hash["ContentType"].join(", ")
         
     | 
| 
       170 
172 
     | 
    
         
             
                  end
         
     | 
| 
       171 
173 
     | 
    
         | 
| 
      
 174 
     | 
    
         
            +
                  item.language_str   = first_if_present doc_hash["Language"]
         
     | 
| 
      
 175 
     | 
    
         
            +
                  
         
     | 
| 
       172 
176 
     | 
    
         
             
                  if ( configuration.highlighting && configuration.snippets_as_abstract &&
         
     | 
| 
       173 
177 
     | 
    
         
             
                    doc_hash["Snippet"] && doc_hash["Snippet"].length > 0 )
         
     | 
| 
       174 
178 
     | 
    
         | 
| 
         @@ -177,8 +181,6 @@ class BentoSearch::SummonEngine 
     | 
|
| 
       177 
181 
     | 
    
         
             
                    item.abstract       = first_if_present doc_hash["Abstract"]
         
     | 
| 
       178 
182 
     | 
    
         
             
                  end
         
     | 
| 
       179 
183 
     | 
    
         | 
| 
       180 
     | 
    
         
            -
                  item.extend( SummonOpenurlOverride )
         
     | 
| 
       181 
     | 
    
         
            -
                  
         
     | 
| 
       182 
184 
     | 
    
         
             
                  results << item
         
     | 
| 
       183 
185 
     | 
    
         
             
                end
         
     | 
| 
       184 
186 
     | 
    
         | 
| 
         @@ -381,18 +383,6 @@ class BentoSearch::SummonEngine 
     | 
|
| 
       381 
383 
     | 
    
         
             
                  }
         
     | 
| 
       382 
384 
     | 
    
         
             
              end
         
     | 
| 
       383 
385 
     | 
    
         | 
| 
       384 
     | 
    
         
            -
              # Module that we extend our ResultItems with, to over-ride 
         
     | 
| 
       385 
     | 
    
         
            -
              # to_openurl to use a dup of ourselves with title/subtitle
         
     | 
| 
       386 
     | 
    
         
            -
              # set to raw ones without highlighting markup. 
         
     | 
| 
       387 
     | 
    
         
            -
              module SummonOpenurlOverride
         
     | 
| 
       388 
     | 
    
         
            -
                def to_openurl
         
     | 
| 
       389 
     | 
    
         
            -
                  dup = self.dup
         
     | 
| 
       390 
     | 
    
         
            -
                  dup.title = self.custom_data["raw_title"]
         
     | 
| 
       391 
     | 
    
         
            -
                  dup.subtitle = self.custom_data["raw_subtitle"]
         
     | 
| 
       392 
     | 
    
         
            -
                  
         
     | 
| 
       393 
     | 
    
         
            -
                  dup.to_openurl
         
     | 
| 
       394 
     | 
    
         
            -
                end      
         
     | 
| 
       395 
     | 
    
         
            -
              end
         
     | 
| 
       396 
386 
     | 
    
         | 
| 
       397 
387 
     | 
    
         | 
| 
       398 
388 
     | 
    
         
             
            end
         
     |