RubyGems - umlaut - Versions diffs - 3.0.0alpha1 - Mend

umlaut 3.0.0alpha1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (293) hide show

data/LICENSE +7 -0
data/README.md +49 -0
data/Rakefile +37 -0
data/app/assets/images/error.gif +0 -0
data/app/assets/images/export_bg_bot.gif +0 -0
data/app/assets/images/export_bg_mid.gif +0 -0
data/app/assets/images/export_bg_top.gif +0 -0
data/app/assets/images/famfamfam/book_open.png +0 -0
data/app/assets/images/famfamfam/cross.png +0 -0
data/app/assets/images/famfamfam/page_sound.gif +0 -0
data/app/assets/images/famfamfam/page_text.gif +0 -0
data/app/assets/images/famfamfam/page_up.gif +0 -0
data/app/assets/images/famfamfam/page_white.png +0 -0
data/app/assets/images/famfamfam/readme.html +1495 -0
data/app/assets/images/famfamfam/tiny_cross.png +0 -0
data/app/assets/images/frame_remove.gif +0 -0
data/app/assets/images/ico_go.gif +0 -0
data/app/assets/images/jhu_findit.gif +0 -0
data/app/assets/images/list_closed.png +0 -0
data/app/assets/images/list_open.png +0 -0
data/app/assets/images/more_info.gif +0 -0
data/app/assets/images/rails.png +0 -0
data/app/assets/images/request.gif +0 -0
data/app/assets/images/spinner.gif +0 -0
data/app/assets/javascripts/umlaut/ajax_windows.js +35 -0
data/app/assets/javascripts/umlaut/ensure_window_size.js.erb +34 -0
data/app/assets/javascripts/umlaut/expand_contract_toggle.js +25 -0
data/app/assets/javascripts/umlaut/search_autocomplete.js +46 -0
data/app/assets/javascripts/umlaut/simple_visible_toggle.js +8 -0
data/app/assets/javascripts/umlaut/update_html.js +152 -0
data/app/assets/javascripts/umlaut.js +17 -0
data/app/assets/stylesheets/umlaut.css +857 -0
data/app/controllers/application_controller.rb +14 -0
data/app/controllers/export_email_controller.rb +123 -0
data/app/controllers/js_helper_controller.rb +10 -0
data/app/controllers/link_router_controller.rb +87 -0
data/app/controllers/open_search_controller.rb +9 -0
data/app/controllers/resolve_controller.rb +288 -0
data/app/controllers/resource_controller.rb +83 -0
data/app/controllers/search_controller.rb +328 -0
data/app/controllers/search_methods/sfx3.rb +148 -0
data/app/controllers/search_methods/sfx4.rb +257 -0
data/app/controllers/search_methods/sfx_api.rb +47 -0
data/app/controllers/store_controller.rb +64 -0
data/app/controllers/umlaut/controller_behavior.rb +20 -0
data/app/controllers/umlaut/controller_logic.rb +96 -0
data/app/controllers/umlaut/error_handling.rb +48 -0
data/app/controllers/umlaut_controller.rb +112 -0
data/app/helpers/application_helper.rb +4 -0
data/app/helpers/emailer_helper.rb +43 -0
data/app/helpers/export_email_helper.rb +34 -0
data/app/helpers/open_search_helper.rb +7 -0
data/app/helpers/resolve_helper.rb +225 -0
data/app/helpers/search_helper.rb +50 -0
data/app/helpers/umlaut/footer_helper.rb +64 -0
data/app/helpers/umlaut/helper.rb +62 -0
data/app/helpers/umlaut/html_head_helper.rb +37 -0
data/app/helpers/umlaut/url_generation.rb +77 -0
data/app/mailers/emailer.rb +48 -0
data/app/models/clickthrough.rb +2 -0
data/app/models/collection.rb +259 -0
data/app/models/crossref_lookup.rb +2 -0
data/app/models/dispatched_service.rb +58 -0
data/app/models/permalink.rb +29 -0
data/app/models/referent.rb +473 -0
data/app/models/referent_value.rb +14 -0
data/app/models/request.rb +449 -0
data/app/models/service_response.rb +179 -0
data/app/models/service_store.rb +59 -0
data/app/models/service_type_value.rb +58 -0
data/app/models/service_wave.rb +150 -0
data/app/models/sfx_db/az_additional_title.rb +11 -0
data/app/models/sfx_db/az_letter_group.rb +11 -0
data/app/models/sfx_db/az_title.rb +38 -0
data/app/models/sfx_db/az_title_v2.rb +34 -0
data/app/models/sfx_db/isbn.rb +12 -0
data/app/models/sfx_db/issn.rb +12 -0
data/app/models/sfx_db/object.rb +35 -0
data/app/models/sfx_db/object_portfolio.rb +6 -0
data/app/models/sfx_db/publisher.rb +10 -0
data/app/models/sfx_db/sfx_db_base.rb +54 -0
data/app/models/sfx_db/target.rb +9 -0
data/app/models/sfx_db/target_service.rb +10 -0
data/app/models/sfx_db/title.rb +10 -0
data/app/models/sfx_db.rb +10 -0
data/app/models/sfx_url.rb +35 -0
data/app/views/emailer/citation.text.erb +28 -0
data/app/views/emailer/short_citation.text.erb +8 -0
data/app/views/export_email/_email.html.erb +25 -0
data/app/views/export_email/_send_email.html.erb +3 -0
data/app/views/export_email/_send_txt.html.erb +3 -0
data/app/views/export_email/_txt.html.erb +62 -0
data/app/views/export_email/email.html.erb +3 -0
data/app/views/export_email/send_email.html.erb +1 -0
data/app/views/export_email/send_txt.html.erb +1 -0
data/app/views/export_email/txt.html.erb +3 -0
data/app/views/js_helper/loader.erb.js +13 -0
data/app/views/layouts/umlaut.html.erb +52 -0
data/app/views/open_search/index.html.erb +9 -0
data/app/views/resolve/_api_in_progress.xml.erb +21 -0
data/app/views/resolve/_background_progress.html.erb +51 -0
data/app/views/resolve/_background_updater.html.erb +38 -0
data/app/views/resolve/_citation.html.erb +87 -0
data/app/views/resolve/_coins.html.erb +1 -0
data/app/views/resolve/_compact_citation.html.erb +33 -0
data/app/views/resolve/_cover_image.html.erb +35 -0
data/app/views/resolve/_fulltext.html.erb +55 -0
data/app/views/resolve/_help.html.erb +17 -0
data/app/views/resolve/_holding.html.erb +91 -0
data/app/views/resolve/_related_items.html.erb +35 -0
data/app/views/resolve/_search_inside.html.erb +62 -0
data/app/views/resolve/_section_display.html.erb +49 -0
data/app/views/resolve/_service_errors.html.erb +29 -0
data/app/views/resolve/_standard_response_item.html.erb +89 -0
data/app/views/resolve/api.xml.builder +72 -0
data/app/views/resolve/background_status.html.erb +26 -0
data/app/views/resolve/index.html.erb +73 -0
data/app/views/resolve/partial_html_sections.xml.erb +30 -0
data/app/views/search/_a_to_z.html.erb +6 -0
data/app/views/search/_citation.html.erb +94 -0
data/app/views/search/_pager.html.erb +60 -0
data/app/views/search/books.html.erb +103 -0
data/app/views/search/journal_search.html.erb +90 -0
data/app/views/search/journals.html.erb +167 -0
data/app/views/search/opensearch_description.rxml +10 -0
data/app/views/testing/index.html.erb +1 -0
data/app/views/umlaut/README +5 -0
data/app/views/umlaut/error.html.erb +45 -0
data/db/migrate/01_umlaut_init.rb +113 -0
data/db/orig_fixed_data/service_type_values.yml +120 -0
data/db/seeds.rb +7 -0
data/lib/CronTab.rb +192 -0
data/lib/aws_product_sign.rb +146 -0
data/lib/exlibris/aleph/patron.rb +64 -0
data/lib/exlibris/aleph/record.rb +54 -0
data/lib/exlibris/aleph/rest_api.rb +29 -0
data/lib/exlibris/primo/holding.rb +192 -0
data/lib/exlibris/primo/rsrc.rb +17 -0
data/lib/exlibris/primo/searcher.rb +276 -0
data/lib/exlibris/primo/source/aleph.rb +46 -0
data/lib/exlibris/primo/source/distribution/nyu_aleph.rb +323 -0
data/lib/exlibris/primo/toc.rb +17 -0
data/lib/exlibris/primo_ws.rb +140 -0
data/lib/generators/templates/umlaut_services.yml +237 -0
data/lib/generators/umlaut/asset_hooks_generator.rb +44 -0
data/lib/generators/umlaut/install_generator.rb +110 -0
data/lib/hip3/bib.rb +291 -0
data/lib/hip3/bib_searcher.rb +302 -0
data/lib/hip3/custom_field_lookup.rb +44 -0
data/lib/hip3/holding.rb +50 -0
data/lib/hip3/item.rb +65 -0
data/lib/hip3/receipt.rb +7 -0
data/lib/hip3/serial_copy.rb +82 -0
data/lib/holding.rb +32 -0
data/lib/marc_helper.rb +254 -0
data/lib/metadata_helper.rb +312 -0
data/lib/opensearch_feed.rb +398 -0
data/lib/opensearch_query.rb +98 -0
data/lib/referent_filter.rb +16 -0
data/lib/referent_filters/dissertation_catch.rb +45 -0
data/lib/section_renderer.rb +503 -0
data/lib/service.rb +336 -0
data/lib/service_adaptors/ajax_export.rb +37 -0
data/lib/service_adaptors/amazon.rb +412 -0
data/lib/service_adaptors/blacklight.rb +327 -0
data/lib/service_adaptors/book_finder.rb +40 -0
data/lib/service_adaptors/bx.rb +51 -0
data/lib/service_adaptors/cover_thing.rb +73 -0
data/lib/service_adaptors/elsevier_cover.rb +57 -0
data/lib/service_adaptors/email_export.rb +10 -0
data/lib/service_adaptors/ezproxy.rb +171 -0
data/lib/service_adaptors/google_book_search.rb +442 -0
data/lib/service_adaptors/gpo.rb +124 -0
data/lib/service_adaptors/hathi_trust.rb +308 -0
data/lib/service_adaptors/hip3_service.rb +150 -0
data/lib/service_adaptors/hip_holding_search.rb +237 -0
data/lib/service_adaptors/internet_archive.rb +488 -0
data/lib/service_adaptors/isbn_db.rb +86 -0
data/lib/service_adaptors/isi.rb +258 -0
data/lib/service_adaptors/jcr.rb +146 -0
data/lib/service_adaptors/opac.rb +351 -0
data/lib/service_adaptors/open_library.rb +316 -0
data/lib/service_adaptors/open_library_cover.rb +73 -0
data/lib/service_adaptors/primo_service.rb +392 -0
data/lib/service_adaptors/primo_source.rb +78 -0
data/lib/service_adaptors/pubmed.rb +133 -0
data/lib/service_adaptors/request_to_fixture.rb +68 -0
data/lib/service_adaptors/scopus.rb +295 -0
data/lib/service_adaptors/sfx-new.rb +557 -0
data/lib/service_adaptors/sfx.rb +566 -0
data/lib/service_adaptors/sfx_backchannel_record.rb +69 -0
data/lib/service_adaptors/txt_holding_export.rb +32 -0
data/lib/service_adaptors/ulrichs_cover.rb +57 -0
data/lib/service_adaptors/ulrichs_link.rb +47 -0
data/lib/service_adaptors/worldcat.rb +116 -0
data/lib/service_adaptors/worldcat_identities.rb +591 -0
data/lib/tasks/umlaut.rake +134 -0
data/lib/umlaut/default_configuration.rb +5 -0
data/lib/umlaut/routes.rb +136 -0
data/lib/umlaut/version.rb +3 -0
data/lib/umlaut.rb +37 -0
data/lib/umlaut_configurable.rb +343 -0
data/lib/umlaut_http.rb +100 -0
data/lib/xml_schema_helper.rb +109 -0
data/test/dummy/Rakefile +7 -0
data/test/dummy/app/assets/javascripts/application.js +13 -0
data/test/dummy/app/assets/stylesheets/application.css +15 -0
data/test/dummy/app/controllers/application_controller.rb +3 -0
data/test/dummy/app/controllers/umlaut_controller.rb +112 -0
data/test/dummy/app/helpers/application_helper.rb +2 -0
data/test/dummy/app/views/layouts/application.html.erb +14 -0
data/test/dummy/config/application.rb +45 -0
data/test/dummy/config/boot.rb +10 -0
data/test/dummy/config/database-jhu.yml +44 -0
data/test/dummy/config/database.yml +25 -0
data/test/dummy/config/environment.rb +5 -0
data/test/dummy/config/environments/development.rb +34 -0
data/test/dummy/config/environments/production.rb +60 -0
data/test/dummy/config/environments/test.rb +39 -0
data/test/dummy/config/initializers/backtrace_silencers.rb +7 -0
data/test/dummy/config/initializers/inflections.rb +10 -0
data/test/dummy/config/initializers/mime_types.rb +5 -0
data/test/dummy/config/initializers/secret_token.rb +7 -0
data/test/dummy/config/initializers/session_store.rb +8 -0
data/test/dummy/config/initializers/wrap_parameters.rb +14 -0
data/test/dummy/config/locales/en.yml +5 -0
data/test/dummy/config/routes.rb +61 -0
data/test/dummy/config/umlaut_services.yml +237 -0
data/test/dummy/config.ru +4 -0
data/test/dummy/db/migrate/20111228211210_umlaut_init.rb +113 -0
data/test/dummy/db/schema.rb +124 -0
data/test/dummy/log/development.log +12981 -0
data/test/dummy/log/production.log +0 -0
data/test/dummy/public/404.html +26 -0
data/test/dummy/public/422.html +26 -0
data/test/dummy/public/500.html +26 -0
data/test/dummy/public/favicon.ico +0 -0
data/test/dummy/script/rails +6 -0
data/test/dummy/tmp/cache/assets/C5F/340/sprockets%2F99692920160b7a279b86a80415b79db7 +0 -0
data/test/dummy/tmp/cache/assets/C70/4D0/sprockets%2F034ad2036e623081bd352800786dfe80 +0 -0
data/test/dummy/tmp/cache/assets/C73/920/sprockets%2Fd371318f22900492fd180f17c5e2a504 +9268 -0
data/test/dummy/tmp/cache/assets/C80/980/sprockets%2Fc94807409c1523d43e18d25f35d93c41 +0 -0
data/test/dummy/tmp/cache/assets/C8F/780/sprockets%2Fe47e28558116fb5f8038754e60d1961d +11769 -0
data/test/dummy/tmp/cache/assets/CAA/EB0/sprockets%2F1d179210e8b76f1ea63c802688a015e4 +9271 -0
data/test/dummy/tmp/cache/assets/CBB/9C0/sprockets%2F706f28923fb754cad04b9107c89986a1 +0 -0
data/test/dummy/tmp/cache/assets/CBF/B60/sprockets%2F08ca89671549936265dcb673bf02e36f +0 -0
data/test/dummy/tmp/cache/assets/CC9/9F0/sprockets%2F306166316e2cafd13c15e62b51a2339d +0 -0
data/test/dummy/tmp/cache/assets/CF6/F20/sprockets%2F5b2ffa1103079dfd555197838f87a99f +0 -0
data/test/dummy/tmp/cache/assets/CF7/2B0/sprockets%2F25a7c73655bd3598173b39d9f98bcd46 +862 -0
data/test/dummy/tmp/cache/assets/CFE/080/sprockets%2F37fe9f4255baddbd549a659914929398 +0 -0
data/test/dummy/tmp/cache/assets/D22/060/sprockets%2F9aec77b768e91a802d284271c58e2f7e +21357 -0
data/test/dummy/tmp/cache/assets/D32/A10/sprockets%2F13fe41fee1fe35b49d145bcc06610705 +0 -0
data/test/dummy/tmp/cache/assets/D33/6D0/sprockets%2F500129c57f1146e556ec3aacd6cd38c1 +0 -0
data/test/dummy/tmp/cache/assets/D33/FD0/sprockets%2F2ba0b4e6334a77b923e5f770381bb2bf +0 -0
data/test/dummy/tmp/cache/assets/D42/C20/sprockets%2Fbcf14e437b1582bf93b77670acf8e090 +21353 -0
data/test/dummy/tmp/cache/assets/D50/A30/sprockets%2F7d8b294ac433db5d056538f8cf7c66b9 +0 -0
data/test/dummy/tmp/cache/assets/D54/ED0/sprockets%2F71c9fa01091d432b131da3bb73faf3d4 +872 -0
data/test/dummy/tmp/cache/assets/D65/590/sprockets%2Fc1bb92fc3406a126b7dd302edc96d629 +0 -0
data/test/dummy/tmp/cache/assets/D71/6B0/sprockets%2Fde558b71b494cf09b1bf055c8dff0353 +0 -0
data/test/dummy/tmp/cache/assets/D72/610/sprockets%2Fa8c708eeb30ef93de34d755d4f45d023 +859 -0
data/test/dummy/tmp/cache/assets/D76/AD0/sprockets%2Fe2158cde93188cf5ab6457bc6d6602ec +0 -0
data/test/dummy/tmp/cache/assets/D7A/E40/sprockets%2F9622ffcc499a57627cd1bb18fe31b8e4 +11772 -0
data/test/dummy/tmp/cache/assets/D84/210/sprockets%2Fabd0103ccec2b428ac62c94e4c40b384 +0 -0
data/test/dummy/tmp/cache/assets/D9B/770/sprockets%2F8aacf02eb7dbb0949704b28f27b87e0b +0 -0
data/test/dummy/tmp/cache/assets/DA6/A80/sprockets%2F92e26d8e58d5bcc8b8f6c25d1b05b9c1 +0 -0
data/test/dummy/tmp/cache/assets/DE8/790/sprockets%2Fd1333bde2b9aafcc712d11dd09ab35d8 +0 -0
data/test/dummy/tmp/cache/assets/DF7/F30/sprockets%2F7bc16c4109b17fabe29f8ddbbf732d1c +374 -0
data/test/dummy/tmp/cache/assets/E03/570/sprockets%2F493bdc0ac14cd4f57fdfe4253f992bde +0 -0
data/test/dummy/tmp/cache/assets/E04/890/sprockets%2F2f5173deea6c795b8fdde723bb4b63af +0 -0
data/test/dummy/tmp/cache/assets/E0B/4B0/sprockets%2F7988df51a61c81ce6ede4a2d4c8cce4f +377 -0
data/test/dummy/tmp/cache/assets/E5F/960/sprockets%2Fdc007b6cad5c7ef08e33ec28cfff0ef6 +0 -0
data/test/fixtures/dispatched_services.yml +5 -0
data/test/fixtures/permalinks.yml +5 -0
data/test/fixtures/referent_values.yml +1734 -0
data/test/fixtures/referents.yml +156 -0
data/test/fixtures/requests.yml +284 -0
data/test/fixtures/service_responses.yml +5 -0
data/test/fixtures/sfx_urls.yml +4 -0
data/test/performance/browsing_test.rb +9 -0
data/test/test_helper.rb +10 -0
data/test/umlaut_test.rb +7 -0
data/test/unit/aleph_patron_test.rb +39 -0
data/test/unit/aleph_record_benchmarks.rb +28 -0
data/test/unit/aleph_record_test.rb +30 -0
data/test/unit/aws_product_sign_test.rb +93 -0
data/test/unit/collection_test.rb +76 -0
data/test/unit/google_book_search_test.rb +101 -0
data/test/unit/primo_searcher_test.rb +403 -0
data/test/unit/primo_service_test.rb +939 -0
data/test/unit/primo_ws_test.rb +131 -0
data/test/unit/service_response_test.rb +9 -0
data/test/unit/service_test.rb +33 -0
metadata +580 -0

data/lib/hip3/receipt.rb ADDED Viewed

@@ -0,0 +1,7 @@
+module Hip3
+  class Receipt
+    def foo
+      raise "foo"
+    end
+  end
+end

data/lib/hip3/serial_copy.rb ADDED Viewed

@@ -0,0 +1,82 @@
+module Hip3
+  # Keeps a reference to it's bib, if it needs to load it's data,
+  # it asks bib to load all
+	# data, and the bib loads it at once for all copies, in one fetch.
+	class SerialCopy < Holding
+		@@Field_labels = {:location => 'Location', :collection => 'Collection', :call_no => 'Call No.', :copy_str => 'Copy No.', :status => 'Status', :notes => 'Notes'}
+		attr_accessor :items # array of items
+		attr_accessor :items_loaded
+		attr_accessor :runs # array of run types/statements
+		def initialize(argBibObj, serialXmlElement=nil)
+			self.bib = argBibObj
+			self.items_loaded = false
+			if ( serialXmlElement )
+				loadFromSerialElement( serialXmlElement )
+			end
+		end
+		def items
+			bib.load_items_from_store if ! items_loaded?
+			return @items || []
+		end
+		def items_loaded?
+			return (items_loaded == true)
+		end
+		def loadFromSerialElement( serialElement )
+			self.location_str = serialElement.at('/location').inner_text
+			self.id = serialElement.at('/copykey').inner_text
+			# Okay, this part is potentially fragile, we have to pull out based on
+			# order in the XML, not sure if that can change. Sorry, that's HIP for you.
+			copyElements = serialElement.search('/copy/cell/data/text').collect {|e| e.inner_text}
+			# Fix this to use field lookup
+			self.location_str = bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:location])
+			self.collection_str =  bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:collection])
+			self.call_no = bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:call_no])
+			self.copy_str = bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:copy_str])
+			self.status_str = bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:status])
+			self.notes = bib.copy_field_lookup.text_value_for(copyElements, @@Field_labels[:notes])
+			#Okay, got to get the 'runs' for summary holdings info.
+			self.runs ||= []
+			serialElement.search('/runlist/run').each do |run|
+				label = run.at('/runlabel').inner_text
+				run.search('/data/rundata').each do |rundata|
+          run = {:label => label, :statement => textValue(rundata.at('/text'))}
+          run[:note] = textValue(rundata.at('/note'))
+          self.runs.push( run )
+				end
+			end
+		end
+		# Not too useful, use coverage_str_to_a instead usually
+		def coverage_str
+			return runs.to_s
+		end
+		# Over-riding
+		def coverage_str_to_a
+			runs.collect do |r|
+        s = ''
+        (s << r[:label] << ": ") if (! r[:label].blank?) && r[:label] != "Main run"
+        s << r[:statement]
+        s << '-- ' << r[:note] if r[:note]
+        s
+      end
+		end
+		def register_item(item)
+			items ||= []
+			unless items.include?(item)
+				items.push(item)
+			end
+		end
+	end
+end

data/lib/holding.rb ADDED Viewed

@@ -0,0 +1,32 @@
+class Holding
+  attr_accessor :locations, :identifier
+  def initialize
+    @locations = []
+  end
+  def find_location(location)
+    @locations.each do | loc |
+      return loc if loc.name == location
+    end
+    return nil
+  end
+  def find_item_by_attribute(key, value)
+    @locations.each do | loc |
+      loc.items.each do | item |
+        return if item.instance_variable_get('@'+key) == value
+      end
+    end
+    return nil
+  end
+end
+class HoldingLocation
+  attr_accessor :name, :code, :items
+  def initialize
+    @items = []
+  end
+end
+class HoldingItem
+  attr_accessor :identifier, :status_code, :status_date, :status, :call_number, :enumeration, :chron, :year
+end

data/lib/marc_helper.rb ADDED Viewed

@@ -0,0 +1,254 @@
+module MarcHelper
+  # Takes an array of ruby MARC objects, adds ServiceResponses
+  # for the 856 links contained.
+  # Returns a hash of arrays of ServiceResponse objects added, keyed
+  # by service type value string.
+  def add_856_links(request, marc_records, options = {})
+    options[:default_service_type] ||= "fulltext"
+    options[:match_reliability] ||= ServiceResponse::MatchExact
+    responses_added = Hash.new
+    # Keep track of urls to avoid putting the exact same url in twice
+    urls_seen = Array.new
+    marc_records.each do |marc_xml|
+      marc_xml.find_all {|f| '856' === f.tag}.each do |field|
+        # Might have more than one $u, in which case we want to
+        # possibly add each of them. Might have 0 $u in which case
+        # we skip.
+        field.subfields.find_all {|sf| sf.code == 'u'}.each do |sf|
+          url = sf.value
+          # Already got it from another catalog record?
+          next if urls_seen.include?(url)
+          # Trying to avoid duplicates with SFX/link resolver.
+          next if  should_skip_856_link?(request, marc_xml, url)
+          urls_seen.push(url)
+          display_name = nil
+          if field['y']
+            display_name = field['y']
+          else
+            # okay let's try taking just the domain from the url
+            begin
+              u_obj = URI::parse( url )
+              display_name = u_obj.host
+            rescue Exception
+            end
+            # Okay, can't parse out a domain, whole url then.
+            display_name = url if display_name.nil?
+          end
+          # But if we've got a $3, the closest MARC comes to a field
+          # that explains what this actually IS, use that too please.
+          display_name = field['3'] + ' from ' + display_name if field['3']
+          # Build the response.
+          response_params = {:service=>self, :display_text=>display_name, :url=>url}
+          # get all those $z subfields and put em in notes.
+          response_params[:url] = url
+          # subfield 3 is being used for OCA records loaded in our catalog.
+          response_params[:notes] =
+          field.subfields.collect {|f| f.value if (f.code == 'z') }.compact.join('; ')
+          is_journal = (marc_xml.leader[7,1] == 's')
+          unless ( field['3'] || ! is_journal ) # subfield 3 is in fact some kind of coverage note, usually
+            response_params[:notes] += "; " unless response_params[:notes].blank?
+            response_params[:notes] += "Dates of coverage unknown."
+          end
+          unless ( options[:match_reliability] == ServiceResponse::MatchExact )
+            response_params[:match_reliability] = options[:match_reliability]
+            response_params[:edition_str] = edition_statement(marc_xml)
+          end
+          # Figure out the right service type value for this, fulltext, ToC,
+          # whatever.
+          response_params[:service_type_value] = service_type_for_856( field, options )
+          # fulltext urls from MARC are always marked as specially stupid.
+          response_params[:coverage_checked] = false
+          response_params[:can_link_to_article] = false
+          # Some debugging info, add the 001 bibID if we have one.
+          response_params[:debug_info] = "BibID: #{marc_xml['001'].value}" if marc_xml['001']
+          # Add the response
+          response = request.add_service_response(response_params)
+          responses_added[response_params[:service_type_value]] ||= Array.new
+          responses_added[response_params[:service_type_value]].push(response)
+        end
+      end
+    end
+    return responses_added
+  end
+  # Used by #add_856_links. Complicated logic to try and avoid
+  # presenting a URL from the catalog that duplicates what SFX does,
+  # but present a URL from the catalog when it's really needed.
+  #
+  # One reason not to include Catalog links for an article-level
+  # citation, even if SFX provided no targets, is maybe SFX
+  # provided no targets because SFX _knew_ that the _particular date_
+  # requested is not available. The catalog doesn't know that, but
+  # we don't want to show a link from the catalog that SFX really
+  # already knew wasn't going to be available.
+  #
+  # So:
+  #
+  # If this is a journal, skip the URL if it matches in our
+  # SFXUrl finder, because that means we think it's an SFX controlled
+  # URL. But if it's not a journal, use it anyway, because it's probably
+  # an e-book that is not in SFX, even if it's from a vendor who is in
+  # SFX. We use MARC leader byte 7 to tell if it's a journal. Confusing enough?
+  # Not yet!  Even if it is a journal, if this isn't an article-level
+  # cite and there are no other full text already provided, we
+  # still include.
+  def should_skip_856_link?(request, marc_record, url)
+     is_journal = (marc_record.leader[7,1] == 's')
+     return (  is_journal &&
+               SfxUrl.sfx_controls_url?(url) &&
+                !(  request.title_level_citation? &&
+                    request.get_service_type("fulltext").length == 0
+                 )
+              )
+  end
+  # Take a ruby Marc Field object representing an 856 field,
+  # decide what umlaut service type value to map it to. Fulltext, ToC, etc.
+  # This is neccesarily a heuristic guess, Marc doesn't have enough granularity
+  # to really let us know for sure.
+  def service_type_for_856(field, options)
+    options[:default_service_type] ||= "fulltext_title_level"
+    # LC records here at hopkins have "Table of contents only" in the 856$3
+      # Think that's a convention from LC?
+      if (field['3'] && field['3'].downcase =~ /table of contents( only)?/)
+        return "table_of_contents"
+      elsif (field['3'] && field['3'].downcase =~ /description/)
+        # If it contains the word 'description', it's probably an abstract.
+        # That's the best we can do, sadly.
+        return "abstract"
+      elsif (field['3'] && field['3'].downcase == 'sample text')
+        # LC records often include these links.
+        return "excerpts"
+      elsif ( field['u'] =~ /www\.loc\.gov/ )
+        # Any other loc.gov link, we know it's not full text, don't put
+        # it in full text field, put it as "see also".
+        return "highlighted_link"
+      else
+        return options[:default_service_type]
+      end
+  end
+  # A MARC record has two dates in it, date1 and date2. Exactly
+  # what they represent is something of an esoteric mystery.
+  # But this will return them both, in an array.
+  def get_years(marc)
+    array = []
+    # no marc 008? Weird, but okay.
+    return array unless marc['008']
+    date1 = marc['008'].value[7,4]
+    date1.strip! if date1
+    array.push(date1) unless date1.blank?
+    date2 = marc['008'].value[11,4]
+    date2.strip! if date2
+    array.push(date2) unless date2.blank?
+    return array
+  end
+  # Take the title out of a marc record
+  def get_title(marc)
+    marc['245'].find_all {|sf| sf.code == "a" || sf.code == "b" || sf.code == "k"}.collect {|sf| sf.text}.join(" ").sub(/\s*[;:\/.,]\s*$/)
+  end
+  # From a marc record, get a string useful to display for identifying
+  # which edition/version of a work this represents.
+  def edition_statement(marc, options = {})
+    options[:include_repro_info] ||= true
+    options[:exclude_533_fields] = ['7','f','b', 'e']
+    parts = Array.new
+    return "" unless marc
+    #245$h GMD
+    unless ( marc['245'].blank? || marc['245']['h'].blank? )
+      parts.push('(' + marc['245']['h'].gsub(/[^\w\s]/, '').strip.titlecase + ')')
+    end
+    #250
+    if ( marc['250'])
+      parts.push( marc['250']['a'] ) unless marc['250']['a'].blank?
+      parts.push( marc['250']['b'] ) unless marc['250']['b'].blank?
+    end
+    # 260
+    if ( marc['260'])
+      if (marc['260']['b'] =~ /s\.n\./)
+        parts.push(marc['260']['a']) unless marc['260']['a'].blank?
+      else
+        parts.push(marc['260']['b']) unless marc['260']['b'].blank?
+      end
+      parts.push( marc['260']['c'] ) unless marc['260']['c'].blank?
+    end
+    # 533
+    if options[:include_repro_info] && marc['533']
+      marc['533'].subfields.each do |s|
+        if ( s.code == 'a' )
+          parts.push('<em>' + s.value.gsub(/[^\w\s]/, '') + '</em>:'  )
+        elsif (! options[:exclude_533_fields].include?( s.code ))
+          parts.push(s.value)
+        end
+      end
+    end
+    return nil if parts.length == 0
+    return parts.join(' ')
+  end
+  # AACR2 "General Material Designation" . While these are (I think?)
+  # controlled, it's actually really hard to find the list. Maybe they're
+  # only semi-controlled.
+  # ONE list can be found here: http://www.oclc.org/bibformats/en/onlinecataloging/default.shtm#BCGFECEG
+  def gmd_values
+    # 'computer file' is an old one that may still be found in data.
+    return ['activity card',
+'art original','art reproduction','braille','chart','diorama','electronic resource','computer file', 'filmstrip','flash card','game','globe','kit','manuscript','map','microform','microscope slides','model','motion picture','music','picture','realia','slide','sound recording','technical drawing','text','toy','transparency','videorecording']
+  end
+  # removes something that looks like an AACR2 GMD in square brackets from
+  # the string. Pretty kludgey.
+  def strip_gmd(arg_string, options = {})
+    options[:replacement] ||= ':'
+    gmd_values.each do |gmd_val|
+      arg_string = arg_string.sub(/\[#{gmd_val}( \((tactile|braile|large print)\))?\]/, options[:replacement])
+    end
+    return arg_string
+  end
+end

data/lib/metadata_helper.rb ADDED Viewed

@@ -0,0 +1,312 @@
+# Helper class to get keyword searchable terms from OpenURL author and title
+#
+# OpenURLs have some commonly agreed upon metadata elements. This module is
+# meant to help simplify things by sorting through the metadata and extracting
+# what we need in a simpler interface. These values are specifically constructed
+# from the citation to work well as keyword searches in other services.
+#
+# Also includes some helpful methods for getting identifiers out in a convenient to work with way, regardless of non-standard ways they may have been stored.
+module MetadataHelper
+  include MarcHelper # for strip gmd functionality
+  # DEPRECATED, not flexible enough, you really need to custom fit
+  # for your given target.
+  # method that accepts a referent to return hash of common metadata elements
+  # choosing the available element for the format and the best available for
+  # searching. Wrapper around the other methods.
+  def get_search_terms(rft)
+    title = get_search_title(rft)
+    creator = get_search_creator(rft)
+    # returns a hash of values so that more keys can be added
+    # and not break services that use this module
+    return {:title => title, :creator => creator}
+  end
+  # A utility method to 'normalize' a title, for use when trying to match a
+  # title from one place with records in another database.
+  # Does lowercasing and removing puncutation, but also stripping out
+  # a bunch of other things that may result
+  # in false negatives. Exactly how you want to do for best results depends
+  # on the particular data you are working with, you need to experiment to see.
+  # MANY options are offered, although defaults are somewhat sensible.
+  # Much of this stuff especially takes account of titles that may have
+  # been generated from mark.
+  # Will never return the emtpy string, will sometimes return nil.
+  def normalize_title(arg_title, options = {})
+    # default options
+    options[:rstrip_parens] ||= true
+    options[:remove_all_parens] ||= true
+    options[:strip_gmd] ||= true
+    options[:subtitle_on_semicolon] ||=false
+    options[:remove_subtitle] ||= false
+    options[:normalize_ampersand] ||= true
+    options[:remove_punctuation] ||= true
+    # Even if you're removing other punctuation, keep the apostrophes?
+    options[:keep_apostrophes] ||=false
+    return nil if arg_title.nil?
+    title = arg_title.clone
+    return nil if title.blank?
+    # Sometimes titles given in the OpenURL have some additional stuff
+    # in parens at the end, that messes up the search and isn't really
+    # part of the title. Eliminate!
+    title.gsub!(/\([^)]*\)\s*$/, '') if options[:rstrip_parens]
+    # Or, not even just at the end, but anywhere!
+    title.gsub!(/\([^)]*\)/, '') if options[:remove_all_parens]
+    # Remove things in brackets, part of an AACR2 GMD that's made it in.
+    # replace with ':' so we can keep track of the fact that everything
+    # that came afterwards was a sub-title like thing.
+    title = strip_gmd(title) if options[:strip_gmd]
+    # There seems to be some catoging/metadata disagreement about when to
+    # use ';' for a subtitle instead of ':'. Normalize to ':'.
+    title.sub!(/[\;]/, ':') if options[:subtitle_on_semicolon]
+    title.sub!(/\:(.*)$/, '') if options[:remove_subtitle]
+    # Change ampersands to 'and' for consistency, we see it both ways.
+    title.gsub!(/\&/, ' and ') if options[:normalize_ampersand]
+    # remove non-alphanumeric, excluding apostrophe
+    title.gsub!(/[^\w\s\']/, ' ') if options[:remove_punctuation]
+    # apostrophe not to space, just eat it.
+    title.gsub!(/[\']/, '') if options[:remove_punctuation] && ! options[:keep_apostrophes]
+    # compress whitespace
+    title.strip!
+    title.gsub!(/\s+/, ' ')
+    title.downcase!
+    title = nil if title.blank?
+    return title
+  end
+  # pick title out of OpenURL referent from best element available,
+  # no normalization.
+  def raw_search_title(rft)
+    # Just make one call to create metadata hash
+    metadata = rft.metadata
+    title = nil
+    if rft.format == 'journal' && metadata['atitle']
+      title = metadata['atitle']
+    elsif rft.format == 'book'
+      title = metadata['btitle'] unless metadata['btitle'].blank?
+      title = metadata['title'] if title.blank?
+    # Well, if we don't know the format and we do have a title use that.
+    # This might happen if we only have an ISBN to start and then enhance.
+    # So should services like Amazon also enhance with a format, should
+    # we simplify this method to not worry about format so much, or do we
+    # keep this as is?
+    elsif metadata['btitle']
+      title = metadata['btitle']
+    elsif metadata['title']
+      title = metadata['title']
+    elsif metadata['jtitle']
+      title = metadata['jtitle']
+    end
+    return title
+  end
+  # chooses the best available title for the format, normalizes
+  def get_search_title(rft, options = {})
+    #defaults
+    options = {:remove_all_parens => true,
+               :subtitle_on_semicolon => true,
+               :remove_subtitle => true,
+               :remove_punctuation => true}.merge(options)
+    title = raw_search_title(rft)
+    return normalize_title(title, options)
+  end
+  # chooses the best available creator for the format
+  def get_search_creator(rft)
+    # Just make one call to create metadata hash
+    metadata = rft.metadata
+    # Identify dc.creator query. Prefer aulast alone if available.
+    creator = nil
+    creator = metadata['aulast'] unless metadata['aulast'].blank?
+    creator = metadata['au'] if creator.blank?
+    # FIXME if capital letters are next to each other should we insert a space?
+    #   Should we assume capitals next to each other are initials?
+    #   Maybe only if we use au?
+    #   Logic like this makes refactoring to use Referent.to_citation less useful.
+    # FIXME strip out commas from creator if we use au?
+    return nil if creator.blank?
+    return creator
+  end
+  def get_top_level_creator(rft)
+     # If it's a non-journal thing, add the author if we have an aulast (preferred) or au.
+    # But wait--if it's a book _part_, don't include the author name, since
+    # it _might_ just be the author of the part, not of the book.
+    unless (rft.format == "journal" ||
+              ( rft.format == "book" &&  ! rft.metadata['atitle'].blank?))
+       return get_search_creator(rft)
+    end
+    return nil
+  end
+  # oclcnum, lccn, and isbn are both _supposed_ to be stored as identifiers
+  # with an info: uri. info:oclcnum/#, info:lccn/#. But SFX sometimes stores
+  # them in the referent metadata instead: rft.lccn, rft.oclcnum. .
+  #
+  # On the other hand, isbn and issn can legitimately be included in referent
+  # metadata or as a urn.
+  #
+  # This method will find you an identifier accross multiple places.
+  #
+  # type:  :urn or :info
+  # subscheme: "lccn", "oclcnum", "isbn", "issn", or anything else that could be found in either a urn an info uri or a referent metadata.
+  # referent: an umlaut Referent object
+  #
+  # returns nil if no identifier found, otherwise the bare identifier (not formatted into a urn/uri right now. Option should be maybe be added?)
+  def get_identifier(type, sub_scheme, referent, options = {} )
+    options[:multiple] ||= false
+    raise Exception.new("type must be :urn or :info") unless type == :urn or type == :info
+    prefix = case type
+               when :info then "info:#{sub_scheme}/"
+               when :urn  then "urn:#{sub_scheme}:"
+             end
+    bare_identifier = nil
+    identifiers = referent.identifiers.collect {|id| $1 if id =~ /^#{prefix}(.*)/}.compact
+    if ( identifiers.blank? &&  ['lccn', 'oclcnum', 'isbn', 'issn', 'doi', 'pmid'].include?(sub_scheme) )
+      # try the referent metadata
+      from_rft = referent.metadata[sub_scheme]
+      identifiers = [from_rft] unless from_rft.blank?
+    end
+    if ( options[:multiple])
+      return identifiers
+    elsif ( identifiers[0].blank? )
+      return nil
+    else
+      return identifiers[0]
+    end
+  end
+  # finds and normalizes an LCCN. If multiple LCCNs are in the record,
+  # returns the first one.
+  def get_lccn(rft)
+    lccn = get_identifier(:info, "lccn", rft)
+    lccn = normalize_lccn(lccn)
+    return lccn
+  end
+  # Gets an ISSN, makes sure it's a valid ISSN or else returns nil.
+  # So will return a valid ISSN (NOT empty string) or nil.
+  def get_issn(rft)
+    issn = rft.metadata['issn']
+    issn = nil unless issn =~ /\d{4}(-)?\d{3}(\d|X)/
+    return issn
+  end
+  # Some normalization. See:
+  # http://info-uri.info/registry/OAIHandler?verb=GetRecord&metadataPrefix=reg&identifier=info:lccn/
+  # doesn't validate right now, only normalizes.
+  # tbd, raise exception if invalid string.
+  def normalize_lccn(lccn)
+    if ( lccn )
+      # remove whitespace
+      lccn = lccn.gsub(/\s/, '')
+      # remove any forward slashes and anything after them
+      lccn = lccn.sub(/\/.*$/, '')
+      # pad anything after a hyphen before removing hyphen, if neccesary
+      lccn = lccn.sub(/-(.*)/) do |match_str|
+        if $1.length < 6
+          ("0" * (6 - $1.length)) + $1
+        else
+          $1
+        end
+      end
+    end
+    return lccn
+  end
+  # Gets isbn, also removes any weird stuff on the end sometimes
+  # included as 'isbn', but not part of the isbn. Like (paperback)
+  # and such.
+  def get_isbn(rft)
+    isbn = get_identifier(:urn, "isbn", rft)
+    isbn = isbn.gsub(/[^\dX-]/, '') if isbn
+    return nil if isbn.blank?
+    return isbn
+  end
+  def get_oclcnum(rft)
+    return get_identifier(:info, "oclcnum", rft)
+  end
+  def get_doi(rft)
+    return get_identifier(:info, "doi", rft)
+  end
+  def get_pmid(rft)
+    return get_identifier(:info, "pmid", rft)
+  end
+  # Returns an array, possibly empty.
+  def get_gpo_item_nums(rft)
+    # In a technically illegal but used by OCLC info:gpo uri
+    ids = get_identifier(:info, "gpo", rft, :multiple => true)
+    # Remove the uri part.
+    return ids.collect {|id| id.sub(/^info:gpo\//, '')  }
+  end
+  def get_sudoc(rft)
+    # Don't forget to unescape the sudoc that was escaped to maek it a uri!
+    # Option 1: In a technically illegal but oh well info:sudoc uri
+    sudoc = get_identifier(:info, "sudoc", rft)
+    sudoc = CGI.unescape(sudoc) if sudoc
+    # Option 2: rsinger's purl for sudoc. http://dilettantes.code4lib.org/2009/03/a-uri-scheme-for-sudocs/
+    unless sudoc
+      sudoc = rft.identifiers.collect {|id| $1 if id =~ /^http:\/\/purl.org\/NET\/sudoc\/(.*)$/}.compact.slice(0)
+      sudoc = CGI.unescape(sudoc) if sudoc
+    end
+    return sudoc
+  end
+  def get_year(rft)
+    # Some link generators use an illegal 'year' parameter
+    if (date = (rft['date'] || rft['year']))
+      return date[0,4]
+    end
+    return nil
+  end
+  # Look at weird bad OpenURLs, use heuristics to see if the 'title' probably
+  # represents a journal rather than a book.
+  def title_is_serial?(rft)
+    (rft.format != "book" &&
+    ( ! rft.metadata['jtitle'].blank?) &&
+    rft.metadata['btitle'].blank?)
+  end
+end