RubyGems - umlaut - Versions diffs - 3.0.0alpha1 - Mend

umlaut 3.0.0alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (293) hide show

data/LICENSE +7 -0
data/README.md +49 -0
data/Rakefile +37 -0
data/app/assets/images/error.gif +0 -0
data/app/assets/images/export_bg_bot.gif +0 -0
data/app/assets/images/export_bg_mid.gif +0 -0
data/app/assets/images/export_bg_top.gif +0 -0
data/app/assets/images/famfamfam/book_open.png +0 -0
data/app/assets/images/famfamfam/cross.png +0 -0
data/app/assets/images/famfamfam/page_sound.gif +0 -0
data/app/assets/images/famfamfam/page_text.gif +0 -0
data/app/assets/images/famfamfam/page_up.gif +0 -0
data/app/assets/images/famfamfam/page_white.png +0 -0
data/app/assets/images/famfamfam/readme.html +1495 -0
data/app/assets/images/famfamfam/tiny_cross.png +0 -0
data/app/assets/images/frame_remove.gif +0 -0
data/app/assets/images/ico_go.gif +0 -0
data/app/assets/images/jhu_findit.gif +0 -0
data/app/assets/images/list_closed.png +0 -0
data/app/assets/images/list_open.png +0 -0
data/app/assets/images/more_info.gif +0 -0
data/app/assets/images/rails.png +0 -0
data/app/assets/images/request.gif +0 -0
data/app/assets/images/spinner.gif +0 -0
data/app/assets/javascripts/umlaut/ajax_windows.js +35 -0
data/app/assets/javascripts/umlaut/ensure_window_size.js.erb +34 -0
data/app/assets/javascripts/umlaut/expand_contract_toggle.js +25 -0
data/app/assets/javascripts/umlaut/search_autocomplete.js +46 -0
data/app/assets/javascripts/umlaut/simple_visible_toggle.js +8 -0
data/app/assets/javascripts/umlaut/update_html.js +152 -0
data/app/assets/javascripts/umlaut.js +17 -0
data/app/assets/stylesheets/umlaut.css +857 -0
data/app/controllers/application_controller.rb +14 -0
data/app/controllers/export_email_controller.rb +123 -0
data/app/controllers/js_helper_controller.rb +10 -0
data/app/controllers/link_router_controller.rb +87 -0
data/app/controllers/open_search_controller.rb +9 -0
data/app/controllers/resolve_controller.rb +288 -0
data/app/controllers/resource_controller.rb +83 -0
data/app/controllers/search_controller.rb +328 -0
data/app/controllers/search_methods/sfx3.rb +148 -0
data/app/controllers/search_methods/sfx4.rb +257 -0
data/app/controllers/search_methods/sfx_api.rb +47 -0
data/app/controllers/store_controller.rb +64 -0
data/app/controllers/umlaut/controller_behavior.rb +20 -0
data/app/controllers/umlaut/controller_logic.rb +96 -0
data/app/controllers/umlaut/error_handling.rb +48 -0
data/app/controllers/umlaut_controller.rb +112 -0
data/app/helpers/application_helper.rb +4 -0
data/app/helpers/emailer_helper.rb +43 -0
data/app/helpers/export_email_helper.rb +34 -0
data/app/helpers/open_search_helper.rb +7 -0
data/app/helpers/resolve_helper.rb +225 -0
data/app/helpers/search_helper.rb +50 -0
data/app/helpers/umlaut/footer_helper.rb +64 -0
data/app/helpers/umlaut/helper.rb +62 -0
data/app/helpers/umlaut/html_head_helper.rb +37 -0
data/app/helpers/umlaut/url_generation.rb +77 -0
data/app/mailers/emailer.rb +48 -0
data/app/models/clickthrough.rb +2 -0
data/app/models/collection.rb +259 -0
data/app/models/crossref_lookup.rb +2 -0
data/app/models/dispatched_service.rb +58 -0
data/app/models/permalink.rb +29 -0
data/app/models/referent.rb +473 -0
data/app/models/referent_value.rb +14 -0
data/app/models/request.rb +449 -0
data/app/models/service_response.rb +179 -0
data/app/models/service_store.rb +59 -0
data/app/models/service_type_value.rb +58 -0
data/app/models/service_wave.rb +150 -0
data/app/models/sfx_db/az_additional_title.rb +11 -0
data/app/models/sfx_db/az_letter_group.rb +11 -0
data/app/models/sfx_db/az_title.rb +38 -0
data/app/models/sfx_db/az_title_v2.rb +34 -0
data/app/models/sfx_db/isbn.rb +12 -0
data/app/models/sfx_db/issn.rb +12 -0
data/app/models/sfx_db/object.rb +35 -0
data/app/models/sfx_db/object_portfolio.rb +6 -0
data/app/models/sfx_db/publisher.rb +10 -0
data/app/models/sfx_db/sfx_db_base.rb +54 -0
data/app/models/sfx_db/target.rb +9 -0
data/app/models/sfx_db/target_service.rb +10 -0
data/app/models/sfx_db/title.rb +10 -0
data/app/models/sfx_db.rb +10 -0
data/app/models/sfx_url.rb +35 -0
data/app/views/emailer/citation.text.erb +28 -0
data/app/views/emailer/short_citation.text.erb +8 -0
data/app/views/export_email/_email.html.erb +25 -0
data/app/views/export_email/_send_email.html.erb +3 -0
data/app/views/export_email/_send_txt.html.erb +3 -0
data/app/views/export_email/_txt.html.erb +62 -0
data/app/views/export_email/email.html.erb +3 -0
data/app/views/export_email/send_email.html.erb +1 -0
data/app/views/export_email/send_txt.html.erb +1 -0
data/app/views/export_email/txt.html.erb +3 -0
data/app/views/js_helper/loader.erb.js +13 -0
data/app/views/layouts/umlaut.html.erb +52 -0
data/app/views/open_search/index.html.erb +9 -0
data/app/views/resolve/_api_in_progress.xml.erb +21 -0
data/app/views/resolve/_background_progress.html.erb +51 -0
data/app/views/resolve/_background_updater.html.erb +38 -0
data/app/views/resolve/_citation.html.erb +87 -0
data/app/views/resolve/_coins.html.erb +1 -0
data/app/views/resolve/_compact_citation.html.erb +33 -0
data/app/views/resolve/_cover_image.html.erb +35 -0
data/app/views/resolve/_fulltext.html.erb +55 -0
data/app/views/resolve/_help.html.erb +17 -0
data/app/views/resolve/_holding.html.erb +91 -0
data/app/views/resolve/_related_items.html.erb +35 -0
data/app/views/resolve/_search_inside.html.erb +62 -0
data/app/views/resolve/_section_display.html.erb +49 -0
data/app/views/resolve/_service_errors.html.erb +29 -0
data/app/views/resolve/_standard_response_item.html.erb +89 -0
data/app/views/resolve/api.xml.builder +72 -0
data/app/views/resolve/background_status.html.erb +26 -0
data/app/views/resolve/index.html.erb +73 -0
data/app/views/resolve/partial_html_sections.xml.erb +30 -0
data/app/views/search/_a_to_z.html.erb +6 -0
data/app/views/search/_citation.html.erb +94 -0
data/app/views/search/_pager.html.erb +60 -0
data/app/views/search/books.html.erb +103 -0
data/app/views/search/journal_search.html.erb +90 -0
data/app/views/search/journals.html.erb +167 -0
data/app/views/search/opensearch_description.rxml +10 -0
data/app/views/testing/index.html.erb +1 -0
data/app/views/umlaut/README +5 -0
data/app/views/umlaut/error.html.erb +45 -0
data/db/migrate/01_umlaut_init.rb +113 -0
data/db/orig_fixed_data/service_type_values.yml +120 -0
data/db/seeds.rb +7 -0
data/lib/CronTab.rb +192 -0
data/lib/aws_product_sign.rb +146 -0
data/lib/exlibris/aleph/patron.rb +64 -0
data/lib/exlibris/aleph/record.rb +54 -0
data/lib/exlibris/aleph/rest_api.rb +29 -0
data/lib/exlibris/primo/holding.rb +192 -0
data/lib/exlibris/primo/rsrc.rb +17 -0
data/lib/exlibris/primo/searcher.rb +276 -0
data/lib/exlibris/primo/source/aleph.rb +46 -0
data/lib/exlibris/primo/source/distribution/nyu_aleph.rb +323 -0
data/lib/exlibris/primo/toc.rb +17 -0
data/lib/exlibris/primo_ws.rb +140 -0
data/lib/generators/templates/umlaut_services.yml +237 -0
data/lib/generators/umlaut/asset_hooks_generator.rb +44 -0
data/lib/generators/umlaut/install_generator.rb +110 -0
data/lib/hip3/bib.rb +291 -0
data/lib/hip3/bib_searcher.rb +302 -0
data/lib/hip3/custom_field_lookup.rb +44 -0
data/lib/hip3/holding.rb +50 -0
data/lib/hip3/item.rb +65 -0
data/lib/hip3/receipt.rb +7 -0
data/lib/hip3/serial_copy.rb +82 -0
data/lib/holding.rb +32 -0
data/lib/marc_helper.rb +254 -0
data/lib/metadata_helper.rb +312 -0
data/lib/opensearch_feed.rb +398 -0
data/lib/opensearch_query.rb +98 -0
data/lib/referent_filter.rb +16 -0
data/lib/referent_filters/dissertation_catch.rb +45 -0
data/lib/section_renderer.rb +503 -0
data/lib/service.rb +336 -0
data/lib/service_adaptors/ajax_export.rb +37 -0
data/lib/service_adaptors/amazon.rb +412 -0
data/lib/service_adaptors/blacklight.rb +327 -0
data/lib/service_adaptors/book_finder.rb +40 -0
data/lib/service_adaptors/bx.rb +51 -0
data/lib/service_adaptors/cover_thing.rb +73 -0
data/lib/service_adaptors/elsevier_cover.rb +57 -0
data/lib/service_adaptors/email_export.rb +10 -0
data/lib/service_adaptors/ezproxy.rb +171 -0
data/lib/service_adaptors/google_book_search.rb +442 -0
data/lib/service_adaptors/gpo.rb +124 -0
data/lib/service_adaptors/hathi_trust.rb +308 -0
data/lib/service_adaptors/hip3_service.rb +150 -0
data/lib/service_adaptors/hip_holding_search.rb +237 -0
data/lib/service_adaptors/internet_archive.rb +488 -0
data/lib/service_adaptors/isbn_db.rb +86 -0
data/lib/service_adaptors/isi.rb +258 -0
data/lib/service_adaptors/jcr.rb +146 -0
data/lib/service_adaptors/opac.rb +351 -0
data/lib/service_adaptors/open_library.rb +316 -0
data/lib/service_adaptors/open_library_cover.rb +73 -0
data/lib/service_adaptors/primo_service.rb +392 -0
data/lib/service_adaptors/primo_source.rb +78 -0
data/lib/service_adaptors/pubmed.rb +133 -0
data/lib/service_adaptors/request_to_fixture.rb +68 -0
data/lib/service_adaptors/scopus.rb +295 -0
data/lib/service_adaptors/sfx-new.rb +557 -0
data/lib/service_adaptors/sfx.rb +566 -0
data/lib/service_adaptors/sfx_backchannel_record.rb +69 -0
data/lib/service_adaptors/txt_holding_export.rb +32 -0
data/lib/service_adaptors/ulrichs_cover.rb +57 -0
data/lib/service_adaptors/ulrichs_link.rb +47 -0
data/lib/service_adaptors/worldcat.rb +116 -0
data/lib/service_adaptors/worldcat_identities.rb +591 -0
data/lib/tasks/umlaut.rake +134 -0
data/lib/umlaut/default_configuration.rb +5 -0
data/lib/umlaut/routes.rb +136 -0
data/lib/umlaut/version.rb +3 -0
data/lib/umlaut.rb +37 -0
data/lib/umlaut_configurable.rb +343 -0
data/lib/umlaut_http.rb +100 -0
data/lib/xml_schema_helper.rb +109 -0
data/test/dummy/Rakefile +7 -0
data/test/dummy/app/assets/javascripts/application.js +13 -0
data/test/dummy/app/assets/stylesheets/application.css +15 -0
data/test/dummy/app/controllers/application_controller.rb +3 -0
data/test/dummy/app/controllers/umlaut_controller.rb +112 -0
data/test/dummy/app/helpers/application_helper.rb +2 -0
data/test/dummy/app/views/layouts/application.html.erb +14 -0
data/test/dummy/config/application.rb +45 -0
data/test/dummy/config/boot.rb +10 -0
data/test/dummy/config/database-jhu.yml +44 -0
data/test/dummy/config/database.yml +25 -0
data/test/dummy/config/environment.rb +5 -0
data/test/dummy/config/environments/development.rb +34 -0
data/test/dummy/config/environments/production.rb +60 -0
data/test/dummy/config/environments/test.rb +39 -0
data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/test/dummy/config/initializers/inflections.rb +10 -0
data/test/dummy/config/initializers/mime_types.rb +5 -0
data/test/dummy/config/initializers/secret_token.rb +7 -0
data/test/dummy/config/initializers/session_store.rb +8 -0
data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
data/test/dummy/config/locales/en.yml +5 -0
data/test/dummy/config/routes.rb +61 -0
data/test/dummy/config/umlaut_services.yml +237 -0
data/test/dummy/config.ru +4 -0
data/test/dummy/db/migrate/20111228211210_umlaut_init.rb +113 -0
data/test/dummy/db/schema.rb +124 -0
data/test/dummy/log/development.log +12981 -0
data/test/dummy/log/production.log +0 -0
data/test/dummy/public/404.html +26 -0
data/test/dummy/public/422.html +26 -0
data/test/dummy/public/500.html +26 -0
data/test/dummy/public/favicon.ico +0 -0
data/test/dummy/script/rails +6 -0
data/test/dummy/tmp/cache/assets/C5F/340/sprockets%2F99692920160b7a279b86a80415b79db7 +0 -0
data/test/dummy/tmp/cache/assets/C70/4D0/sprockets%2F034ad2036e623081bd352800786dfe80 +0 -0
data/test/dummy/tmp/cache/assets/C73/920/sprockets%2Fd371318f22900492fd180f17c5e2a504 +9268 -0
data/test/dummy/tmp/cache/assets/C80/980/sprockets%2Fc94807409c1523d43e18d25f35d93c41 +0 -0
data/test/dummy/tmp/cache/assets/C8F/780/sprockets%2Fe47e28558116fb5f8038754e60d1961d +11769 -0
data/test/dummy/tmp/cache/assets/CAA/EB0/sprockets%2F1d179210e8b76f1ea63c802688a015e4 +9271 -0
data/test/dummy/tmp/cache/assets/CBB/9C0/sprockets%2F706f28923fb754cad04b9107c89986a1 +0 -0
data/test/dummy/tmp/cache/assets/CBF/B60/sprockets%2F08ca89671549936265dcb673bf02e36f +0 -0
data/test/dummy/tmp/cache/assets/CC9/9F0/sprockets%2F306166316e2cafd13c15e62b51a2339d +0 -0
data/test/dummy/tmp/cache/assets/CF6/F20/sprockets%2F5b2ffa1103079dfd555197838f87a99f +0 -0
data/test/dummy/tmp/cache/assets/CF7/2B0/sprockets%2F25a7c73655bd3598173b39d9f98bcd46 +862 -0
data/test/dummy/tmp/cache/assets/CFE/080/sprockets%2F37fe9f4255baddbd549a659914929398 +0 -0
data/test/dummy/tmp/cache/assets/D22/060/sprockets%2F9aec77b768e91a802d284271c58e2f7e +21357 -0
data/test/dummy/tmp/cache/assets/D32/A10/sprockets%2F13fe41fee1fe35b49d145bcc06610705 +0 -0
data/test/dummy/tmp/cache/assets/D33/6D0/sprockets%2F500129c57f1146e556ec3aacd6cd38c1 +0 -0
data/test/dummy/tmp/cache/assets/D33/FD0/sprockets%2F2ba0b4e6334a77b923e5f770381bb2bf +0 -0
data/test/dummy/tmp/cache/assets/D42/C20/sprockets%2Fbcf14e437b1582bf93b77670acf8e090 +21353 -0
data/test/dummy/tmp/cache/assets/D50/A30/sprockets%2F7d8b294ac433db5d056538f8cf7c66b9 +0 -0
data/test/dummy/tmp/cache/assets/D54/ED0/sprockets%2F71c9fa01091d432b131da3bb73faf3d4 +872 -0
data/test/dummy/tmp/cache/assets/D65/590/sprockets%2Fc1bb92fc3406a126b7dd302edc96d629 +0 -0
data/test/dummy/tmp/cache/assets/D71/6B0/sprockets%2Fde558b71b494cf09b1bf055c8dff0353 +0 -0
data/test/dummy/tmp/cache/assets/D72/610/sprockets%2Fa8c708eeb30ef93de34d755d4f45d023 +859 -0
data/test/dummy/tmp/cache/assets/D76/AD0/sprockets%2Fe2158cde93188cf5ab6457bc6d6602ec +0 -0
data/test/dummy/tmp/cache/assets/D7A/E40/sprockets%2F9622ffcc499a57627cd1bb18fe31b8e4 +11772 -0
data/test/dummy/tmp/cache/assets/D84/210/sprockets%2Fabd0103ccec2b428ac62c94e4c40b384 +0 -0
data/test/dummy/tmp/cache/assets/D9B/770/sprockets%2F8aacf02eb7dbb0949704b28f27b87e0b +0 -0
data/test/dummy/tmp/cache/assets/DA6/A80/sprockets%2F92e26d8e58d5bcc8b8f6c25d1b05b9c1 +0 -0
data/test/dummy/tmp/cache/assets/DE8/790/sprockets%2Fd1333bde2b9aafcc712d11dd09ab35d8 +0 -0
data/test/dummy/tmp/cache/assets/DF7/F30/sprockets%2F7bc16c4109b17fabe29f8ddbbf732d1c +374 -0
data/test/dummy/tmp/cache/assets/E03/570/sprockets%2F493bdc0ac14cd4f57fdfe4253f992bde +0 -0
data/test/dummy/tmp/cache/assets/E04/890/sprockets%2F2f5173deea6c795b8fdde723bb4b63af +0 -0
data/test/dummy/tmp/cache/assets/E0B/4B0/sprockets%2F7988df51a61c81ce6ede4a2d4c8cce4f +377 -0
data/test/dummy/tmp/cache/assets/E5F/960/sprockets%2Fdc007b6cad5c7ef08e33ec28cfff0ef6 +0 -0
data/test/fixtures/dispatched_services.yml +5 -0
data/test/fixtures/permalinks.yml +5 -0
data/test/fixtures/referent_values.yml +1734 -0
data/test/fixtures/referents.yml +156 -0
data/test/fixtures/requests.yml +284 -0
data/test/fixtures/service_responses.yml +5 -0
data/test/fixtures/sfx_urls.yml +4 -0
data/test/performance/browsing_test.rb +9 -0
data/test/test_helper.rb +10 -0
data/test/umlaut_test.rb +7 -0
data/test/unit/aleph_patron_test.rb +39 -0
data/test/unit/aleph_record_benchmarks.rb +28 -0
data/test/unit/aleph_record_test.rb +30 -0
data/test/unit/aws_product_sign_test.rb +93 -0
data/test/unit/collection_test.rb +76 -0
data/test/unit/google_book_search_test.rb +101 -0
data/test/unit/primo_searcher_test.rb +403 -0
data/test/unit/primo_service_test.rb +939 -0
data/test/unit/primo_ws_test.rb +131 -0
data/test/unit/service_response_test.rb +9 -0
data/test/unit/service_test.rb +33 -0
metadata +580 -0

data/lib/service_adaptors/hip_holding_search.rb ADDED Viewed

@@ -0,0 +1,237 @@
+class HipHoldingSearch < Hip3Service
+  required_config_params :base_path, :display_name
+  attr_reader :base_path
+  include MarcHelper
+  def initialize(config)
+    # Default preemption by any holding
+    @bib_limit = 4
+    @preempted_by = { "existing_type" => "holding" }
+    @keyword_exact_match = true
+    # If you are sending an OpenURL from a library service, you may
+    # have the HIP bibnum, and include it in the OpenURL as, eg.
+    # rft_id=http://catalog.library.jhu.edu/bib/343434 (except URL-encoded)
+    # Then you'd set rft_id_bibnum_prefix to http://catalog.library.jhu.edu/bib/
+    @rft_id_bibnum_prefix = nil
+    @profile = "general"
+    super(config)
+    # Trim question-mark from base_url, if given
+    @base_path.chop! if (@base_path.rindex('?') ==  @base_path.length)
+  end
+  def service_types_generated
+    # Add one more to whatever the Hip3Service does.
+    return super.push(ServiceTypeValue['holding_search'])
+  end
+  def handle(request)
+    # Only do anything if we have no holdings results from someone else.
+    holdings = request.service_types.find(:all, :conditions=>["service_type_value_name = ?", "holding"])
+    if (holdings.length > 0)
+      return request.dispatched(self, true)
+    end
+    ref_metadata = request.referent.metadata
+    bib_searcher = Hip3::BibSearcher.new(@base_path)
+    search_hash = {}
+    if ( request.referent.format != "book" &&
+        (! ref_metadata['jtitle'].blank?) &&
+        ref_metadata['bititle'].blank? )
+      hip_title_index = Hip3::BibSearcher::SERIAL_TITLE_KW_INDEX
+    else
+      hip_title_index = Hip3::BibSearcher::TITLE_KW_INDEX
+    end
+    title = ref_metadata['jtitle']
+    title = ref_metadata['btitle'] if title.blank?
+    title = ref_metadata['title'] if title.blank?
+    #title_terms = search_terms_for_title_tokenized(title)
+    # tokenized was too much recall, not enough precision. Try phrase
+    # search.
+    title_terms = search_terms_for_title_phrase(title)
+    unless ( title_terms )
+      Rails.logger.debug("#{self.service_id} is missing title, can not search.")
+      return request.dispatched(self, true)
+    end
+    search_hash[hip_title_index] = title_terms
+    # Do we have the bibnum?
+    bibnum = get_bibnum(request.referent)
+    bib_searcher.bibnum = bibnum if bibnum
+    # If it's a non-journal thing, add the author if we have an aulast (preferred) or au.
+    # But wait--if it's a book _part_, don't include the author name, since
+    # it _might_ just be the author of the part, not of the book.
+    unless (request.referent.format == "journal" ||
+              ( request.referent.format == "book" &&  ! ref_metadata['atitle'].blank?))
+      # prefer aulast
+      if (! ref_metadata['aulast'].blank?)
+        search_hash[ Hip3::BibSearcher::AUTHOR_KW_INDEX ] = [ref_metadata['aulast']]
+      elsif (! ref_metadata['au'].blank?)
+        search_hash[ Hip3::BibSearcher::AUTHOR_KW_INDEX ] = [ref_metadata['au']]
+      end
+    end
+    bib_searcher.search_hash = search_hash
+    unless bib_searcher.insufficient_query
+      timing_debug("start search")
+      bibs = bib_searcher.search
+      timing_debug("bib searching")
+      # Ssee if any our matches are exact title matches. 'exact' after normalizing a bit, including removing subtitles.
+      matches = [];
+      # Various variant normalized forms of the title from the OpenURL
+      # request. #compact removes nil values.
+      request_titles = [title,
+                       normalize_title( title ),
+                       normalize_title( title, :remove_subtitle => true)   ].compact
+      if ( @keyword_exact_match )
+        bibs.each do |bib|
+          # various variant normalized forms of the title from the bib
+          # #compact removes nil values.
+          bib_titles = [ bib.title,
+                         normalize_title(bib.title, :remove_subtitle => true),
+                         normalize_title(bib.title) ].compact
+          # Do any of the various forms match? Set intersection on our
+          # two sets.
+          if ( bib_titles & request_titles ).length > 0
+            matches.push( bib )
+          end
+        end
+      end
+      responses_added = Hash.new
+      timing_debug("Finding matches")
+      if (matches.length > 0 )
+        # process as exact matches with method from Hip3Service
+        # Add copies
+        # Add 856 urls.
+        responses_added = {}
+        unless preempted_by(request, "fulltext")
+          # Let's do some analysis of our results. If it's got a matching
+          # bibnum, then include it as an EXACT match.
+          req_bibnum = get_bibnum(request.referent)
+          if ( req_bibnum )
+            matches.each do |bib|
+              if (req_bibnum == bib.bibNum)
+                responses_added.merge!( add_856_links(request, [bib.marc_xml])  )
+                responses_added.merge!( add_copies( request, [bib] ))
+                matches.delete(bib)
+              end
+            end
+          end
+          timing_debug("Identified matches")
+          # Otherwise, sort records with matching dates FIRST.
+          # Some link generators use an illegal 'year' parameter, bah.
+          if ( date = (request.referent['date'] || request.referent['year']))
+            req_year = date[0,4]
+            matches = matches.partition {|bib| get_years(bib.marc_xml).include?( req_year )}.flatten
+          end
+          timing_debug("Date sorted")
+          responses_added.merge!( add_856_links(request, matches.collect{|b| b.marc_xml}, :match_reliability => ServiceResponse::MatchUnsure ) )
+          timing_debug("added 856's")
+        end
+        responses_added.merge!(  add_copies(request, matches, :match_reliability => ServiceResponse::MatchUnsure ) )
+        timing_debug("added copies")
+      end
+      if (bibs.length > 0 && (! responses_added['holding']))
+        # process as holdings_search
+        request.add_service_response(
+          :service => self,
+          :source_name => @display_name,
+          :count => bibs.length,
+          :display_text => "#{bibs.length} possible #{case; when bibs.length > 1 ; 'matches' ; else; 'match' ; end} in #{display_name}",
+          :url => bib_searcher.search_url,
+          :service_type_value => :holding_search)
+      end
+    end
+    return request.dispatched(self, true)
+  end
+  # One algorithm for turning a title into HIP search terms.
+  # Tokenizes the title into individual words, eliminates stop-words,
+  # and combines each word with 'AND'. We started with this for maximum
+  # recall, but after some experimentation seems to have too low precision
+  # without sufficient enough increase in recall.
+  # Returns an array of keywords.
+  def search_terms_for_title_tokenized(title)
+    title_cleaned = normalize_title(title)
+    if title_cleaned.blank?
+      # Not enough metadata to search.
+      return nil
+    end
+    # plus remove some obvious stop words, cause HIP is going to choke on em
+    title_cleaned.gsub!(/\bthe\b|\band\b|\bor\b|\bof\b|\ba\b/i,'')
+    title_kws = title_cleaned.split
+    # limit to 12 keywords
+    title_kws = title_kws.slice( (0..11) )
+    return title_kws
+  end
+  # Another algorithm for turning a title into HIP search terms.
+  # This one doesn't tokenize, but keeps the whole title as a phrase
+  # search. Does eliminate punctuation. Does not remove things that
+  # look like a sub-title.
+  # Returns an array with one item.
+  def search_terms_for_title_phrase(title)
+    title_cleaned = normalize_title(title)
+    if title_cleaned.blank?
+      # Not enough metadata to search.
+      return nil
+    end
+    return [title_cleaned]
+  end
+  def timing_debug(waypoint = "Waypoint")
+    @last_timed ||= Time.now
+    before = @last_timed
+    @last_timed = Time.now
+    interval = @last_timed - before
+    Rails.logger.debug("#{service_id}: #{waypoint}: #{interval}")
+  end
+end

data/lib/service_adaptors/internet_archive.rb ADDED Viewed

@@ -0,0 +1,488 @@
+# This service searches the Internet Archive (archive.org) by title
+# and, if present, creator. Results are broken down by mediatypes. Which
+# mediatypes are searched can be configured via umlaut_config/services.yml.
+# Also an optional link to a full search in the native interface can be
+# presented to the user.
+# Property settings can be set in services.yml
+# url:
+# num_results: a number. This is the number of results returned for each
+#   mediatype within the main section of the view
+# mediatypes: an array of the mediatypes searched. insure there is an
+#   appropriate mediatype as defined by IA. Searching by mediatype searches
+#   across collections.
+#   The following link will (currently) show the possible mediatypes:
+#   http://homeserver7.us.archive.org:8983/solr/select?q=[*+TO+*]&fl=identifier&wt=json&rows=0&indent=yes&facet=true&facet.field=mediatype
+# show_web_link: boolean. If set to true, if there are more results than
+#   num_results a link to those further results will display
+#   with highlighted_links
+# display_name: defaults to "Internet Archive"
+class InternetArchive < Service
+  require 'open-uri' #
+  require 'cgi'
+  require 'multi_json' #we ask IA for json
+  require 'timeout' # used to timeout our requests
+  include MetadataHelper
+  # No parameters are required, we have working defaults for them all.
+  attr_reader :url, :num_results, :mediatypes
+  # maps the IA mediatype to Umlaut service type
+  SERVICE_TYPE_MAP = {
+    "texts" => :fulltext,
+    "audio" => :audio
+  }
+  def service_types_generated
+    types = [
+      ServiceTypeValue[:fulltext],
+      ServiceTypeValue[:audio],
+      ServiceTypeValue[:'highlighted_link']
+      ]
+    types << ServiceTypeValue[:search_inside] if @include_search_inside
+    return types
+  end
+  def initialize(config)
+    # Default base URL for IA advanced search. We use this base link rather than
+    # the this rather than the IA Solr index directly because IA suggests that
+    # the Solr home may change over time.
+    @url = 'http://www.archive.org/advancedsearch.php?'
+    # default number of results to return
+    @num_results = 1
+    # default IA mediatypes to search
+    @mediatypes = ["texts", "audio"]
+    # Should the web link to further results be shown? default to true
+    @show_web_link = true
+    @display_name = "the Internet Archive"
+    @http_timeout = 5.seconds
+    @include_search_inside = false
+    @credits = {
+      "The Internet Archive" => "http://archive.org/"
+    }
+    super(config)
+    @num_results_for_types ||= {}
+    @mediatypes.each do |type|
+      @num_results_for_types[type] ||= @num_results
+    end
+  end
+  def handle(request)
+    begin
+      do_query(request)
+    rescue Timeout::Error => e
+      return request.dispatched(self, false, e)
+    end
+    return request.dispatched(self, true)
+  end
+  def do_query(request)
+    # get the search terms for use in both fulltext search and highlighted_link
+    # IA does index apostrophes, although not generally other puncutation. Need to keep em.
+    search_terms = {:title => get_search_title(request.referent ,:keep_apostrophes=>true),
+    :creator => get_search_creator(request.referent)}
+    # We need both title and author to continue
+    return nil if (search_terms[:title].blank? || search_terms[:creator].blank?)
+    # Return if this is an journal article link, an IA search can do nothing
+    # for us except waste CPU cycles for us and IA.
+    metadata = request.referent.metadata
+    return nil unless metadata["atitle"].blank? &&
+                      metadata["issue"].blank? &&
+                      metadata["volume"].blank?
+    # create one link that searches all configured mediatypes
+    link = @url + ia_params(search_terms)
+    # using open() conveniently follows the redirect for us. Alas, it
+    # doesn't give us access to the IA http status code response though.
+    begin
+      response = nil
+      timeout(@http_timeout.to_i) {
+        response = open(link).read
+      }
+    rescue Exception => e
+      # Log more info for exception, and then just forward exception on,
+      # we don't have any way to handle it.
+      Rails.logger.error("InternetArchive exception, for url[[#{link}]] , Exception #{e.class}")
+      raise e
+    end
+    if response.blank?
+      Rails.logger.warn("InternetArchive returned empty response for #{link}")
+      return nil
+    end
+    doc = MultiJson.decode(response)
+    results = doc['response']['docs']
+    @mediatypes.each do |type|
+     type_results = get_results_by_type(results, type)
+      # if we have more results than we want to show in the main view
+      # we can ceate a link (highlighted_link) to the search in the sidebar
+      num_found = type_results.length #doc['response']['numFound']
+      if (@show_web_link and not type_results.empty? and @num_results_for_types[type] < num_found )
+        do_web_link(request, search_terms, type, num_found)
+      end
+      # Check for search inside only for first result of type 'text'
+      if (@include_search_inside &&
+          type == 'texts' &&
+          (first_hit = type_results[0]) &&
+          (identifier = first_hit["identifier"])
+          )
+        direct_url = URI.parse("http://www.archive.org/stream/" + identifier)
+        # Head request, if we get a 200, we think it means we have page
+        # turner with search.
+        req = Net::HTTP.new(direct_url.host, direct_url.port)
+        response = req.request_head(direct_url.path)
+        if response.code == "200"
+          # search inside!
+          request.add_service_response(
+            :service => self,
+            :display_text=> @display_name,
+            :url => direct_url.to_s,
+            :service_type_value => :search_inside
+          )
+        end
+      end
+      # add a service response for each result for this mediatype
+      type_results.each_with_index do |result, index|
+        break if index == @num_results_for_types[type]
+        display_name = @display_name
+        if ( result["collection"] && COLLECTION_LABELS[result["collection"][0]])
+          display_name += ": " + COLLECTION_LABELS[result["collection"][0]]
+        elsif ( result["collection"])
+          display_name += ": " + result["collection"][0].titlecase
+        end
+        #note = result['title']
+        #note << " by " << result['creator'].join(', ') if result['creator']
+        service_type = SERVICE_TYPE_MAP[type]
+        request.add_service_response(
+            :service=>self,
+            :display_text=>display_name,
+            :url=>create_result_url(result),
+            :match_reliability => ServiceResponse::MatchUnsure,
+            :edition_str => edition_str(result),
+            :service_type_value => service_type )
+      end
+    end
+  end
+  # Here we create params in the format that the IA advanced search needs.
+  # These are solr-like params.
+  def ia_params(search_terms)
+    return nil if search_terms[:title].nil?
+    params = 'fl%5B%5D=*&fmt=json&xmlsearch=Search' #&indent=yes
+    params << "&rows=999&q=" #is 999 too many or even too few?
+    params << create_query_params(search_terms)
+  end
+  def create_result_url(result)
+    'http://archive.org/details/' + result['identifier']
+  end
+  # displaying the num_found relies on the number of results from ia_params being
+  # enough to capture all results for a mediatype. If there are more potential
+  # results then num_found will not be accurate. But good enough.
+  def do_web_link(request, search_terms, type, num_found)
+    display_text = "#{num_found} digital #{type.singularize} " + (num_found > 1 ? "files" : "file")
+    url = create_web_link_url(search_terms, type)
+    request.add_service_response(
+        :service=>self,
+        :url=>url,
+        :display_text=>display_text,
+        :service_type_value => :highlighted_link
+     )
+  end
+  def create_web_link_url(search_terms, type)
+    'http://www.archive.org/search.php?query=' << create_query_params(search_terms, type)
+    #url << CGI.escape('mediatype:' << type << ' AND ')
+  end
+  # if given a type it will only search for one mediatype. otherwise it
+  # does an OR search for all configured mediatypes
+  def create_query_params(search_terms, type=nil)
+    # Downcase params to avoid weird misconfiguration in IA's SOLR
+    # installation, where it's interpreting uppercase words as
+    # commands even within quotes. Also take out any parens in input.
+    # Also IA does not semi-colons in input?!?
+    title = safe_argument(search_terms[:title])
+    params = 'title:' << CGI.escape('"' << title << '"')
+    if (! search_terms[:creator].blank?)
+      creator = safe_argument(search_terms[:creator])
+      params << '+AND+creator:' << CGI.escape('(' << creator << ')')
+    end
+    mt = []
+    params <<  '+AND+('
+    if type
+      params << 'mediatype:' << type
+    else
+      @mediatypes.each do |t|
+        mt << ('mediatype:' << t)
+      end
+      params << mt.join('+OR+')
+    end
+    params << ')' #closing the mediatypes with a paren
+  end
+  # used on what will be values stuck into a URL as search terms,
+  # does NOT cgi escape, but does safe-ify them in other ways for IA.
+  def safe_argument(string)
+    # Downcase params to avoid weird misconfiguration in IA's SOLR
+    # installation, where it's interpreting uppercase words as
+    # commands even within quotes.
+    output = string.downcase
+    # Remove parens, semi-colons, and brackets -- they all mess
+    # up IA, which thinks they are special chars. Remove double quote,
+    # special char, which sometimes we want to use ourselves. Replace
+    # all with spaces to avoid accidentally conjoining words.
+    # (could be
+    # escaping instead? Not worth it, we don't want to search
+    # on these anyway. Remove ALL punctuation? Not sure.)
+    output.gsub!(/[)(\]\[;"\=]/, ' ')
+    return output
+  end
+  def get_results_by_type(results, type)
+    results.map{|doc| doc if doc["mediatype"] == type}.compact
+  end
+  def edition_str(result)
+    parts = []
+    parts.push( result['title']) unless result['title'].blank?
+    parts.push( result['publisher'] ) unless result['publisher'].blank?
+    parts.push( result['year']) unless result['year'].blank?
+    edition_str = parts.join(', ')
+    edition_str = nil if edition_str.blank?
+    return edition_str
+  end
+  # catch and redirect response_url fo rsearch_inside
+  def response_url(service_type, submitted_params)
+    if ( ! (service_type.service_type_value.name == "search_inside" ))
+      return super(service_type, submitted_params)
+    else
+      base = service_type.service_response[:url]
+      query = CGI.escape(submitted_params["query"] || "")
+      url = base + "#search/#{query}"
+      return url
+    end
+  end
+  ## collection labels
+  # list of collection labels can be found here:
+  # http://www.archive.org/advancedsearch.php?q=mediatype%3Acollection&fl[]=collection&fl[]=identifier&fl[]=title&sort[]=&sort[]=&sort[]=&rows=9999&indent=yes&fmt=json&xmlsearch=Search
+  # FIXME either get these dynamically at intervals or add a fuller set below.
+  #   Currently there are over 4300 collections.
+  # If we're going to do this as a static hash then it should be a class
+  # constant. Currently this hash contains a small selection of collections
+  # which include the 'audio' mediatype and all that contain the 'texts' mediatype.
+  COLLECTION_LABELS = {
+    "CaliforniaFishandGame"=>"California Fish and Game",
+    "ol_data"=>"Open Library Data",
+    "worldhealthorganization"=>"World Health Organization",
+    "opensource_movies"=>"Open Source Movies",
+    "clairetcarneylibrary"=>
+      "Claire T. Carney Library, University of Massachusetts Dartmouth",
+    "university_of_illinois_urbana-champaign"=>
+      "University of Illinois Urbana-Champaign",
+    "smithsonian_books"=>"Smithsonian",
+    "nhml_london"=>"Natural History Museum Library, London",
+    "animationandcartoons"=>"Animation & Cartoons",
+    "university_of_toronto_regis"=>"Regis College Library",
+    "vlogs"=>"Vlogs",
+    "opensource"=>"Open Source Books",
+    "USGovernmentDocuments"=>"US Government Documents",
+    "danceman"=>"Dance Manuals",
+    "additional_collections"=>"Additional Collections",
+    "internet_archive_books"=>"Internet Archive Books",
+    "sloan"=>"Sloan Foundation",
+    "iacl"=>"Children's Library",
+    "audio_religion"=>"Spirituality & Religion",
+    "microfilm"=>"Books from Microfilm",
+    "toronto"=>"Canadian Libraries",
+    "prelinger"=>"Prelinger Archives",
+    "bostonpubliclibrary"=>"Boston Public Library",
+    "sports"=>"Sports Videos",
+    "universallibrary"=>"Universal Library",
+    "sfpl"=>"The San Francisco Public Library",
+    "university_of_toronto_knox"=>"Caven Library, Knox College",
+    "memorial_university"=>"Memorial University of Newfoundland & Labrador",
+    "MBLWHOI"=>"MBLWHOI Library",
+    "oreilly_books"=>"O'Reilly",
+    "burstein"=>"The Burstein Alice in Wonderland Collection",
+    "ucroho"=>"Regional Oral History Office",
+    "Brandeis_University"=>"Brandeis University Libraries",
+    "birney_anti_slavery_collection"=>"Birney Anti-Slavery Collection",
+    "Johns_Hopkins_University"=>"The Johns Hopkins University Sheridan Libraries",
+    "culturalandacademicfilms"=>"Cultural & Academic Films",
+    "Harvard_University"=>"Harvard University",
+    "montana_state_publications"=>"Montana State Government Publications",
+    "national_institute_for_newman_studies"=>
+      "National Institute for Newman Studies",
+    "buddha"=>"Buddha Books",
+    "university_of_toronto_fisher"=>"Thomas Fisher Rare Book Library",
+    "ryerson_university"=>"Ryerson University",
+    "university_of_toronto_emmanuel"=>
+      "Emmanuel College Library, Victoria University",
+    "unica"=>"Unica: Rare Books from UIUC",
+    "mugar"=>"The Mugar Memorial Library, Boston University",
+    "havergal"=>"Havergal College",
+    "university_of_toronto_gerstein"=>
+      "University of Toronto - Gerstein Science Information Centre",
+    "NY_Botanical_Garden"=>"The New York Botanical Garden",
+    "calacademy"=>"California Academy of Sciences",
+    "chm_fiche"=>"Computer History Museum",
+    "university_of_toronto_crrs"=>
+      "Centre for Reformation and Renaissance Studies Library",
+    "djo"=>"Dickens Journals Online",
+    "unclibraries"=>"University of North Carolina at Chapel Hill",
+    "university_of_toronto_oise"=>"OISE/UT Library",
+    "newsandpublicaffairs"=>"News & Public Affairs",
+    "biodiversity"=>"Biodiversity Heritage Library",
+    "university_of_ottawa"=>"University of Ottawa",
+    "Wellesley_College_Library"=>"Wellesley College Library",
+    "audio_foreign"=>"Non-English Audio",
+    "national_library_of_australia"=>"National Library of Australia",
+    "datadumps"=>"Open Library Data",
+    "microfilmreel"=>"Reels of Microfilm",
+    "saint_marys_college"=>"Saint Mary's College of California",
+    "university_of_toronto_pratt"=>"E.J. Pratt Library",
+    "Boston_College_Library"=>"Boston College Library",
+    "uchicago"=>"University of Chicago",
+    "audio_podcast"=>"Podcasts",
+    "tufts"=>"Tufts University",
+    "opensource_audio"=>"Open Source Audio",
+    "university_of_toronto_trinity"=>"John W. Graham Library, Trinity College",
+    "audio_tech"=>"Computers & Technology",
+    "moviesandfilms"=>"Movies",
+    "etree"=>"Live Music Archive",
+    "marcuslucero"=>"the Marucs Lucero",
+    "opencontentalliance"=>"Open Content Alliance",
+    "radioprograms"=>"Radio Programs",
+    "university_of_toronto_pims"=>"PIMS - University of Toronto",
+    "newspapers"=>"Newspapers",
+    "university_of_california_libraries"=>"University of California Libraries",
+    "millionbooks"=>"Million Book Project",
+    "university_of_toronto_robarts"=>"University of Toronto - Robarts Library",
+    "university_of_toronto"=>"University of Toronto",
+    "montana_state_library"=>"Montana State Library",
+    "bancroft_library"=>"The Bancroft Library",
+    "prelinger_library"=>"Prelinger Library",
+    "libraryofcongress"=>"The Library of Congress",
+    "richtest"=>"Test books from California",
+    "mobot"=>"Missouri Botanical Garden",
+    "gamevideos"=>"Video Games",
+    "blc"=>"The Boston Library Consortium",
+    "cdl"=>"California Digital Library",
+    "Princeton"=>"Princeton Theological Seminary",
+    "mcmaster_university"=>"McMaster University",
+    "sanfranciscopubliclibrary"=>"San Francisco Public Library",
+    "spanish_texts"=>"The Spanish Language Library",
+    "boston_college_libraries"=>"The Boston College Libraries",
+    "gutenberg"=>"Project Gutenberg",
+    "Music_UniversityofToronto"=>"Music - University of Toronto",
+    "msn_books"=>"Microsoft",
+    "youth_media"=>"Youth Media",
+    "independent"=>"independent texts",
+    "carletonlibrary"=>"Carleton University Library",
+    "arpanet"=>"Arpanet",
+    "yahoo_books"=>"Yahoo!",
+    "johnadamsBPL"=>"The John Adams Library at the Boston Public Library",
+    "library_of_congress"=>"The Library of Congress",
+    "ColumbiaUniversityLibraries"=>"Columbia University Libraries",
+    "university_of_guelph"=>"University of Guelph",
+    "GratefulDead"=>"Grateful Dead",
+    "audio_bookspoetry"=>"Audio Books & Poetry",
+    "ncsulibraries"=>"North Carolina State University Libraries",
+    "brown_university_library"=>"Brown University Library",
+    "Allen_County_Public_Library"=>"Allen County Public Library",
+    "yrlsc"=>"The Charles E. Young Research Library Special Collections",
+    "torontotest"=>"Test books from Canada",
+    "americana"=>"American Libraries",
+    "librivoxaudio"=>"LibriVox",
+    "audio_music"=>"Music & Arts",
+    "toronto_public_library"=>"Toronto Public Library",
+    "getty"=>"Research Library, Getty Research Institute",
+    "ontla"=>"The Legislative Assembly of Ontario Collection",
+    "TheChristianRadical"=>"The Christian Radical",
+    "netlabels"=>"Netlabels",
+    "newyorkpubliclibrary"=>"New York Public Library",
+    "University_of_New_Hampshire_Library"=>"University of New Hampshire Library",
+    "cbk"=>"Cook Books and Home Economics",
+    "audio_news"=>"News & Public Affairs",
+    "ant_texts"=>"Ant Texts",
+    "computersandtechvideos"=>"Computers & Technology",
+    "the_beat_within"=>"The Beat Within Magazine",
+    "university_of_toronto_kelly"=>"University of Toronto - John M Kelly Library",
+    "library_and_archives_canada"=>"Library and Archives Canada",
+    "ephemera"=>"Ephemeral Films",
+    "OXFAM"=>"Oxfam",
+    "foreignlanguagevideos"=>"Non-English Videos",
+    "MontanaStateLibrary"=>"Montana State Library",
+    "EarthSciences_UniversityofToronto"=>"Earth Sciences University of Toronto",
+    "octavo"=>"Octavo",
+    "artsandmusicvideos"=>"Arts & Music"
+  }
+end
+# Test URLs using defaults
+# Shows texts and audio under fulltext, but only a see also for texts
+# http://localhost:3000/resolve?&rft.title=Fairy+Tales&rft.aulast=Andersen&ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
+#
+# Shows texts and audio, but only see also for audio
+# http://localhost:3000/resolve?&rft.title=Frankenstein&rft.aulast=Shelley&ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Abook
+#
+# WorldCat links
+# If you have OpenURL Referrer or another Firefox add-on configured to
+# turn COiNS into an OpenURL to localhost:3000, these links have hits in IA.
+# Frankenstein: http://www.worldcat.org/oclc/33045872
+# Alice in Wonderland: http://www.worldcat.org/oclc/221499
+# Fairy Tales by Andersen: http://www.worldcat.org/oclc/68711386
+# Adventures of Huckleberry Finn: http://www.worldcat.org/oclc/2985768
+# Gift of the Magi: http://www.worldcat.org/oclc/9065223
+# Heart of the West: http://www.worldcat.org/oclc/49293242
+# Little Women; or, Meg, Jo, Beth, and Amy: http://www.worldcat.org/oclc/1157
+#   FIXME should we remove everything after ; as well?
+# Letters from a Cat: http://www.worldcat.org/oclc/13529549
+# Uncle Tom's Cabin: http://www.worldcat.org/oclc/7945691
+#   needed apostrophe to succeed
+# Goody Two-Shoes: http://www.worldcat.org/oclc/32678428
+# The Snow-Image: http://www.worldcat.org/oclc/5020610
+# Les Canadiens-Français: http://www.worldcat.org/oclc/186641188
+#   FIXME should match 1 record and doesn't. character encoding problems?
+# John L. Stoddard's Lectures: http://www.worldcat.org/oclc/2181690