discovery-indexer 0.3 → 0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/discovery-indexer.rb +0 -1
- data/lib/mapper/general_mapper.rb +162 -1
- data/lib/reader/purlxml_parser_strict.rb +0 -1
- data/lib/version.rb +1 -1
- data/lib/writer/solr_client.rb +5 -7
- data/lib/writer/solr_writer.rb +9 -9
- metadata +20 -21
- data/lib/mapper/index_mapper.rb +0 -179
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 7839ece7ccc93abe604fbc2e88c7735da73e3593
         | 
| 4 | 
            +
              data.tar.gz: 5713715eac6372293c4e7c50cf4169b3566c873e
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 980c089b86b8dc005f92e858e3a2eec5f64cbad358510432241928db201fd452511581f71aa57b42d2f124d143f9e43c344ce4072f49568629801a1da31e55d5
         | 
| 7 | 
            +
              data.tar.gz: ded188f8a0d2b06c3702c89070af6000c7d943fb98f3ce35d3cf4189a708acfcc5ce69de8fb213128490dd6927ed47589d5dc5203b6d2bf387425c701636af08
         | 
    
        data/lib/discovery-indexer.rb
    CHANGED
    
    
| @@ -2,6 +2,12 @@ module DiscoveryIndexer | |
| 2 2 | 
             
              module Mapper
         | 
| 3 3 | 
             
                class GeneralMapper
         | 
| 4 4 |  | 
| 5 | 
            +
                  # Initializes an instance from IndexMapper
         | 
| 6 | 
            +
                  # @param [String] druid e.g. ab123cd4567
         | 
| 7 | 
            +
                  # @param [Stanford::Mods::Record] modsxml represents the MODS xml for the druid
         | 
| 8 | 
            +
                  # @param [DiscoveryIndexer::Reader::PurlxmlModel] purlxml represents the purlxml model
         | 
| 9 | 
            +
                  # @param [Hash] collection_names represents a hash of collection_druid and 
         | 
| 10 | 
            +
                  #  collection_name !{"aa111aa1111"=>"First Collection", "bb123bb1234"=>"Second Collection"}
         | 
| 5 11 | 
             
                  def initialize(druid, modsxml, purlxml, collection_names={})
         | 
| 6 12 | 
             
                    @druid = druid
         | 
| 7 13 | 
             
                    @modsxml = modsxml
         | 
| @@ -9,9 +15,164 @@ module DiscoveryIndexer | |
| 9 15 | 
             
                    @collection_names = collection_names
         | 
| 10 16 | 
             
                  end
         | 
| 11 17 |  | 
| 18 | 
            +
                  # Create a Hash representing a Solr doc, with all MODS related fields populated.  
         | 
| 19 | 
            +
                  # @return [Hash] Hash representing the Solr document
         | 
| 12 20 | 
             
                  def map()
         | 
| 21 | 
            +
                    solr_doc = {}
         | 
| 22 | 
            +
                    solr_doc[:id] = @druid
         | 
| 23 | 
            +
                    solr_doc.update mods_to_title_fields
         | 
| 24 | 
            +
                    solr_doc.update mods_to_author_fields
         | 
| 25 | 
            +
                    solr_doc.update mods_to_subject_search_fields
         | 
| 26 | 
            +
                    solr_doc.update mods_to_publication_fields
         | 
| 27 | 
            +
                    solr_doc.update mods_to_pub_date
         | 
| 28 | 
            +
                    solr_doc.update mods_to_others
         | 
| 29 | 
            +
                    
         | 
| 30 | 
            +
                    solr_doc[:all_search] = @modsxml.text.gsub(/\s+/, ' ')
         | 
| 31 | 
            +
                    return solr_doc
         | 
| 13 32 | 
             
                  end
         | 
| 14 | 
            -
             | 
| 33 | 
            +
             | 
| 34 | 
            +
                  # @return [Hash] Hash representing the title fields
         | 
| 35 | 
            +
                  def mods_to_title_fields
         | 
| 36 | 
            +
                    # title fields
         | 
| 37 | 
            +
                    doc_hash = { 
         | 
| 38 | 
            +
                      :title_245a_search => @modsxml.sw_short_title,
         | 
| 39 | 
            +
                      :title_245_search => @modsxml.sw_full_title,
         | 
| 40 | 
            +
                      :title_variant_search => @modsxml.sw_addl_titles,
         | 
| 41 | 
            +
                      :title_sort => @modsxml.sw_sort_title,
         | 
| 42 | 
            +
                      :title_245a_display => @modsxml.sw_short_title,
         | 
| 43 | 
            +
                      :title_display => @modsxml.sw_title_display,
         | 
| 44 | 
            +
                      :title_full_display => @modsxml.sw_full_title,
         | 
| 45 | 
            +
                    }
         | 
| 46 | 
            +
                    doc_hash
         | 
| 47 | 
            +
                  end
         | 
| 48 | 
            +
                  
         | 
| 49 | 
            +
                  # @return [Hash] Hash representing the author fields
         | 
| 50 | 
            +
                  def mods_to_author_fields
         | 
| 51 | 
            +
                    doc_hash = { 
         | 
| 52 | 
            +
                      # author fields
         | 
| 53 | 
            +
                      :author_1xx_search => @modsxml.sw_main_author,
         | 
| 54 | 
            +
                      :author_7xx_search => @modsxml.sw_addl_authors,
         | 
| 55 | 
            +
                      :author_person_facet => @modsxml.sw_person_authors,
         | 
| 56 | 
            +
                      :author_other_facet => @modsxml.sw_impersonal_authors,
         | 
| 57 | 
            +
                      :author_sort => @modsxml.sw_sort_author[1..-1],
         | 
| 58 | 
            +
                      :author_corp_display => @modsxml.sw_corporate_authors,
         | 
| 59 | 
            +
                      :author_meeting_display => @modsxml.sw_meeting_authors,
         | 
| 60 | 
            +
                      :author_person_display => @modsxml.sw_person_authors,
         | 
| 61 | 
            +
                      :author_person_full_display => @modsxml.sw_person_authors,
         | 
| 62 | 
            +
                    }
         | 
| 63 | 
            +
                    doc_hash
         | 
| 64 | 
            +
                  end
         | 
| 65 | 
            +
                  
         | 
| 66 | 
            +
                  # @return [Hash] Hash representing the search fields
         | 
| 67 | 
            +
                  def mods_to_subject_search_fields
         | 
| 68 | 
            +
                    doc_hash = { 
         | 
| 69 | 
            +
                      # subject search fields
         | 
| 70 | 
            +
                      :topic_search => @modsxml.topic_search, 
         | 
| 71 | 
            +
                      :geographic_search => @modsxml.geographic_search,
         | 
| 72 | 
            +
                      :subject_other_search => @modsxml.subject_other_search, 
         | 
| 73 | 
            +
                      :subject_other_subvy_search => @modsxml.subject_other_subvy_search,
         | 
| 74 | 
            +
                      :subject_all_search => @modsxml.subject_all_search, 
         | 
| 75 | 
            +
                      :topic_facet => @modsxml.topic_facet,
         | 
| 76 | 
            +
                      :geographic_facet => @modsxml.geographic_facet,
         | 
| 77 | 
            +
                      :era_facet => @modsxml.era_facet,
         | 
| 78 | 
            +
                    }
         | 
| 79 | 
            +
                  end
         | 
| 80 | 
            +
                  
         | 
| 81 | 
            +
                  # @return [Hash] Hash representing the publication fields
         | 
| 82 | 
            +
                  def mods_to_publication_fields
         | 
| 83 | 
            +
                    doc_hash = { 
         | 
| 84 | 
            +
                      # publication fields
         | 
| 85 | 
            +
                      :pub_search =>  @modsxml.place,
         | 
| 86 | 
            +
                      :pub_date_sort =>  @modsxml.pub_date_sort,
         | 
| 87 | 
            +
                      :imprint_display =>  @modsxml.pub_date_display,
         | 
| 88 | 
            +
                      :pub_date =>  @modsxml.pub_date_facet,
         | 
| 89 | 
            +
                      :pub_date_display =>  @modsxml.pub_date_display, # pub_date_display may be deprecated
         | 
| 90 | 
            +
                    }
         | 
| 91 | 
            +
                  end
         | 
| 92 | 
            +
                  
         | 
| 93 | 
            +
                  # @return [Hash] Hash representing the pub date
         | 
| 94 | 
            +
                  def mods_to_pub_date
         | 
| 95 | 
            +
                    doc_hash = {}
         | 
| 96 | 
            +
                    pub_date_sort = @modsxml.pub_date_sort
         | 
| 97 | 
            +
                    if is_positive_int? pub_date_sort
         | 
| 98 | 
            +
                      doc_hash[:pub_year_tisim] =  pub_date_sort # for date slider
         | 
| 99 | 
            +
                      # put the displayable year in the correct field, :creation_year_isi for example
         | 
| 100 | 
            +
                      doc_hash[date_type_sym] =  @modsxml.pub_date_sort  if date_type_sym
         | 
| 101 | 
            +
                    end
         | 
| 102 | 
            +
                    return doc_hash
         | 
| 103 | 
            +
                  end    
         | 
| 104 | 
            +
                    
         | 
| 105 | 
            +
                  # @return [Hash] Hash representing some fields 
         | 
| 106 | 
            +
                  def mods_to_others
         | 
| 107 | 
            +
                    doc_hash = { 
         | 
| 108 | 
            +
                      :format_main_ssim => format_main_ssim,
         | 
| 109 | 
            +
                      :format => format, # for backwards compatibility
         | 
| 110 | 
            +
                      :language => @modsxml.sw_language_facet,
         | 
| 111 | 
            +
                      :physical =>  @modsxml.term_values([:physical_description, :extent]),
         | 
| 112 | 
            +
                      :summary_search => @modsxml.term_values(:abstract),
         | 
| 113 | 
            +
                      :toc_search => @modsxml.term_values(:tableOfContents),
         | 
| 114 | 
            +
                      :url_suppl => @modsxml.term_values([:related_item, :location, :url]),
         | 
| 115 | 
            +
                    }
         | 
| 116 | 
            +
                    return doc_hash
         | 
| 117 | 
            +
                  end
         | 
| 118 | 
            +
                
         | 
| 119 | 
            +
                  # select one or more format values from the controlled vocabulary here:
         | 
| 120 | 
            +
                  #   http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
         | 
| 121 | 
            +
                  # via stanford-mods gem
         | 
| 122 | 
            +
                  # @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
         | 
| 123 | 
            +
                  def format
         | 
| 124 | 
            +
                    vals = @modsxml.format
         | 
| 125 | 
            +
                    if vals.empty?
         | 
| 126 | 
            +
                      puts "#{@druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements"
         | 
| 127 | 
            +
                    end
         | 
| 128 | 
            +
                    vals
         | 
| 129 | 
            +
                  end
         | 
| 130 | 
            +
                  
         | 
| 131 | 
            +
                  # call stanford-mods format_main to get results
         | 
| 132 | 
            +
                  # @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
         | 
| 133 | 
            +
                  def format_main_ssim
         | 
| 134 | 
            +
                    vals = @modsxml.format_main
         | 
| 135 | 
            +
                    if vals.empty?
         | 
| 136 | 
            +
                      puts "#{@druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements"
         | 
| 137 | 
            +
                    end
         | 
| 138 | 
            +
                    vals
         | 
| 139 | 
            +
                  end
         | 
| 140 | 
            +
                
         | 
| 141 | 
            +
                  # call stanford-mods sw_genre to get results
         | 
| 142 | 
            +
                  # @return [Array<String>] value(s) 
         | 
| 143 | 
            +
                  def genre_ssim
         | 
| 144 | 
            +
                    @modsxml.sw_genre
         | 
| 145 | 
            +
                  end
         | 
| 146 | 
            +
                
         | 
| 147 | 
            +
                protected
         | 
| 148 | 
            +
                
         | 
| 149 | 
            +
                  # @return true if the string parses into an int, and if so, the int is >= 0
         | 
| 150 | 
            +
                  def is_positive_int? str
         | 
| 151 | 
            +
                    begin
         | 
| 152 | 
            +
                      if str.to_i >= 0
         | 
| 153 | 
            +
                        return true
         | 
| 154 | 
            +
                      else
         | 
| 155 | 
            +
                        return false
         | 
| 156 | 
            +
                      end
         | 
| 157 | 
            +
                    rescue
         | 
| 158 | 
            +
                    end
         | 
| 159 | 
            +
                    return false
         | 
| 160 | 
            +
                  end
         | 
| 161 | 
            +
                
         | 
| 162 | 
            +
                  # determines particular flavor of displayable publication year field 
         | 
| 163 | 
            +
                  # @return Solr field name as a symbol
         | 
| 164 | 
            +
                  def date_type_sym
         | 
| 165 | 
            +
                    vals = @modsxml.term_values([:origin_info,:dateIssued])
         | 
| 166 | 
            +
                    if vals and vals.length > 0
         | 
| 167 | 
            +
                      return :publication_year_isi
         | 
| 168 | 
            +
                    end
         | 
| 169 | 
            +
                    vals = @modsxml.term_values([:origin_info,:dateCreated])  
         | 
| 170 | 
            +
                    if vals and vals.length > 0
         | 
| 171 | 
            +
                      return :creation_year_isi
         | 
| 172 | 
            +
                    end
         | 
| 173 | 
            +
                    nil
         | 
| 174 | 
            +
                  end
         | 
| 175 | 
            +
                  
         | 
| 15 176 | 
             
                end
         | 
| 16 177 | 
             
              end
         | 
| 17 178 | 
             
            end
         | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        data/lib/writer/solr_client.rb
    CHANGED
    
    | @@ -11,8 +11,8 @@ module DiscoveryIndexer | |
| 11 11 | 
             
                  # @param [Hash] solr_doc a Hash representation of the solr document
         | 
| 12 12 | 
             
                  # @param [RSolr::Client] solr_connector is an open connection with the solr core
         | 
| 13 13 | 
             
                  # @param [Integer] max_retries the maximum number of tries before fail
         | 
| 14 | 
            -
                  def self.add(solr_doc, solr_connector, max_retries = 10)
         | 
| 15 | 
            -
                    process(solr_doc, solr_connector, max_retries, is_delete=false)  
         | 
| 14 | 
            +
                  def self.add(id, solr_doc, solr_connector, max_retries = 10)
         | 
| 15 | 
            +
                    process(id, solr_doc, solr_connector, max_retries, is_delete=false)  
         | 
| 16 16 | 
             
                  end
         | 
| 17 17 |  | 
| 18 18 | 
             
                  # Add the document to solr, retry if an error occurs.
         | 
| @@ -20,13 +20,11 @@ module DiscoveryIndexer | |
| 20 20 | 
             
                  # @param [Hash] solr_doc that has only the id !{:id=>"ab123cd4567"}
         | 
| 21 21 | 
             
                  # @param [RSolr::Client] solr_connector is an open connection with the solr core
         | 
| 22 22 | 
             
                  # @param [Integer] max_retries the maximum number of tries before fail
         | 
| 23 | 
            -
                  def self.delete(solr_doc, solr_connector, max_retries = 10)
         | 
| 24 | 
            -
                    process(solr_doc, solr_connector, max_retries, is_delete=true)
         | 
| 23 | 
            +
                  def self.delete(id, solr_doc, solr_connector, max_retries = 10)
         | 
| 24 | 
            +
                    process(id, solr_doc, solr_connector, max_retries, is_delete=true)
         | 
| 25 25 | 
             
                  end
         | 
| 26 26 |  | 
| 27 | 
            -
                  def self.process(solr_doc, solr_connector, max_retries, is_delete=false)
         | 
| 28 | 
            -
                    id = solr_doc[:id]
         | 
| 29 | 
            -
                    puts id
         | 
| 27 | 
            +
                  def self.process(id, solr_doc, solr_connector, max_retries, is_delete=false)
         | 
| 30 28 | 
             
                    handler = Proc.new do |exception, attempt_number, total_delay|
         | 
| 31 29 | 
             
                      DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
         | 
| 32 30 | 
             
                    end
         | 
    
        data/lib/writer/solr_writer.rb
    CHANGED
    
    | @@ -6,7 +6,7 @@ module DiscoveryIndexer | |
| 6 6 | 
             
                class SolrWriter
         | 
| 7 7 | 
             
                  include DiscoveryIndexer::Logging
         | 
| 8 8 |  | 
| 9 | 
            -
                  def process( | 
| 9 | 
            +
                  def process(id, index_doc, targets, solr_targets_configs)
         | 
| 10 10 | 
             
                    @solr_targets_configs = solr_targets_configs
         | 
| 11 11 | 
             
                    index_targets = []
         | 
| 12 12 | 
             
                    delete_targets = []
         | 
| @@ -20,29 +20,29 @@ module DiscoveryIndexer | |
| 20 20 | 
             
                    end
         | 
| 21 21 |  | 
| 22 22 | 
             
                    # get targets with true
         | 
| 23 | 
            -
                    solr_index_client(index_doc, index_targets)
         | 
| 23 | 
            +
                    solr_index_client(id, index_doc, index_targets)
         | 
| 24 24 | 
             
                    # get targets with false
         | 
| 25 | 
            -
                    solr_delete_client( | 
| 25 | 
            +
                    solr_delete_client(id, delete_targets)
         | 
| 26 26 | 
             
                  end
         | 
| 27 27 |  | 
| 28 | 
            -
                  def solr_delete_from_all( | 
| 28 | 
            +
                  def solr_delete_from_all(id, solr_targets_configs)
         | 
| 29 29 | 
             
                    # Get a list of all registered targets
         | 
| 30 30 | 
             
                    @solr_targets_configs=solr_targets_configs
         | 
| 31 31 | 
             
                    targets = @solr_targets_configs.keys()
         | 
| 32 | 
            -
                    solr_delete_client( | 
| 32 | 
            +
                    solr_delete_client(id, targets)
         | 
| 33 33 | 
             
                  end
         | 
| 34 34 |  | 
| 35 | 
            -
                  def solr_index_client(index_doc, targets)
         | 
| 35 | 
            +
                  def solr_index_client(id, index_doc, targets)
         | 
| 36 36 | 
             
                    targets.each do |solr_target|
         | 
| 37 37 | 
             
                      solr_connector = get_connector_for_target(solr_target)     
         | 
| 38 | 
            -
                       SolrClient.add(index_doc, solr_connector)
         | 
| 38 | 
            +
                       SolrClient.add(id, index_doc, solr_connector)
         | 
| 39 39 | 
             
                    end          
         | 
| 40 40 | 
             
                  end
         | 
| 41 41 |  | 
| 42 | 
            -
                  def solr_delete_client( | 
| 42 | 
            +
                  def solr_delete_client(id, targets)
         | 
| 43 43 | 
             
                    targets.each do |solr_target|
         | 
| 44 44 | 
             
                      solr_connector = get_connector_for_target(solr_target)     
         | 
| 45 | 
            -
                      SolrClient.delete({ | 
| 45 | 
            +
                      SolrClient.delete(id,{}, solr_connector)
         | 
| 46 46 | 
             
                    end         
         | 
| 47 47 | 
             
                  end
         | 
| 48 48 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,125 +1,125 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: discovery-indexer
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: '0. | 
| 4 | 
            +
              version: '0.4'
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Ahmed AlSum
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-03- | 
| 11 | 
            +
            date: 2015-03-25 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: nokogiri
         | 
| 15 15 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 16 16 | 
             
                requirements:
         | 
| 17 | 
            -
                - -  | 
| 17 | 
            +
                - - '>='
         | 
| 18 18 | 
             
                  - !ruby/object:Gem::Version
         | 
| 19 19 | 
             
                    version: '0'
         | 
| 20 20 | 
             
              type: :runtime
         | 
| 21 21 | 
             
              prerelease: false
         | 
| 22 22 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 23 23 | 
             
                requirements:
         | 
| 24 | 
            -
                - -  | 
| 24 | 
            +
                - - '>='
         | 
| 25 25 | 
             
                  - !ruby/object:Gem::Version
         | 
| 26 26 | 
             
                    version: '0'
         | 
| 27 27 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 28 28 | 
             
              name: stanford-mods
         | 
| 29 29 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 30 30 | 
             
                requirements:
         | 
| 31 | 
            -
                - -  | 
| 31 | 
            +
                - - '>='
         | 
| 32 32 | 
             
                  - !ruby/object:Gem::Version
         | 
| 33 33 | 
             
                    version: '0'
         | 
| 34 34 | 
             
              type: :runtime
         | 
| 35 35 | 
             
              prerelease: false
         | 
| 36 36 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 37 37 | 
             
                requirements:
         | 
| 38 | 
            -
                - -  | 
| 38 | 
            +
                - - '>='
         | 
| 39 39 | 
             
                  - !ruby/object:Gem::Version
         | 
| 40 40 | 
             
                    version: '0'
         | 
| 41 41 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 42 42 | 
             
              name: retries
         | 
| 43 43 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 44 44 | 
             
                requirements:
         | 
| 45 | 
            -
                - -  | 
| 45 | 
            +
                - - '>='
         | 
| 46 46 | 
             
                  - !ruby/object:Gem::Version
         | 
| 47 47 | 
             
                    version: '0'
         | 
| 48 48 | 
             
              type: :runtime
         | 
| 49 49 | 
             
              prerelease: false
         | 
| 50 50 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 51 51 | 
             
                requirements:
         | 
| 52 | 
            -
                - -  | 
| 52 | 
            +
                - - '>='
         | 
| 53 53 | 
             
                  - !ruby/object:Gem::Version
         | 
| 54 54 | 
             
                    version: '0'
         | 
| 55 55 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 56 56 | 
             
              name: rsolr
         | 
| 57 57 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 58 58 | 
             
                requirements:
         | 
| 59 | 
            -
                - -  | 
| 59 | 
            +
                - - '>='
         | 
| 60 60 | 
             
                  - !ruby/object:Gem::Version
         | 
| 61 61 | 
             
                    version: '0'
         | 
| 62 62 | 
             
              type: :runtime
         | 
| 63 63 | 
             
              prerelease: false
         | 
| 64 64 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 65 65 | 
             
                requirements:
         | 
| 66 | 
            -
                - -  | 
| 66 | 
            +
                - - '>='
         | 
| 67 67 | 
             
                  - !ruby/object:Gem::Version
         | 
| 68 68 | 
             
                    version: '0'
         | 
| 69 69 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 70 70 | 
             
              name: rspec
         | 
| 71 71 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 72 72 | 
             
                requirements:
         | 
| 73 | 
            -
                - -  | 
| 73 | 
            +
                - - '>='
         | 
| 74 74 | 
             
                  - !ruby/object:Gem::Version
         | 
| 75 75 | 
             
                    version: '0'
         | 
| 76 76 | 
             
              type: :development
         | 
| 77 77 | 
             
              prerelease: false
         | 
| 78 78 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 79 79 | 
             
                requirements:
         | 
| 80 | 
            -
                - -  | 
| 80 | 
            +
                - - '>='
         | 
| 81 81 | 
             
                  - !ruby/object:Gem::Version
         | 
| 82 82 | 
             
                    version: '0'
         | 
| 83 83 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 84 84 | 
             
              name: webmock
         | 
| 85 85 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 86 86 | 
             
                requirements:
         | 
| 87 | 
            -
                - -  | 
| 87 | 
            +
                - - '>='
         | 
| 88 88 | 
             
                  - !ruby/object:Gem::Version
         | 
| 89 89 | 
             
                    version: '0'
         | 
| 90 90 | 
             
              type: :development
         | 
| 91 91 | 
             
              prerelease: false
         | 
| 92 92 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 93 93 | 
             
                requirements:
         | 
| 94 | 
            -
                - -  | 
| 94 | 
            +
                - - '>='
         | 
| 95 95 | 
             
                  - !ruby/object:Gem::Version
         | 
| 96 96 | 
             
                    version: '0'
         | 
| 97 97 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 98 98 | 
             
              name: equivalent-xml
         | 
| 99 99 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 100 100 | 
             
                requirements:
         | 
| 101 | 
            -
                - -  | 
| 101 | 
            +
                - - '>='
         | 
| 102 102 | 
             
                  - !ruby/object:Gem::Version
         | 
| 103 103 | 
             
                    version: '0'
         | 
| 104 104 | 
             
              type: :development
         | 
| 105 105 | 
             
              prerelease: false
         | 
| 106 106 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 107 107 | 
             
                requirements:
         | 
| 108 | 
            -
                - -  | 
| 108 | 
            +
                - - '>='
         | 
| 109 109 | 
             
                  - !ruby/object:Gem::Version
         | 
| 110 110 | 
             
                    version: '0'
         | 
| 111 111 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 112 112 | 
             
              name: vcr
         | 
| 113 113 | 
             
              requirement: !ruby/object:Gem::Requirement
         | 
| 114 114 | 
             
                requirements:
         | 
| 115 | 
            -
                - -  | 
| 115 | 
            +
                - - '>='
         | 
| 116 116 | 
             
                  - !ruby/object:Gem::Version
         | 
| 117 117 | 
             
                    version: '0'
         | 
| 118 118 | 
             
              type: :development
         | 
| 119 119 | 
             
              prerelease: false
         | 
| 120 120 | 
             
              version_requirements: !ruby/object:Gem::Requirement
         | 
| 121 121 | 
             
                requirements:
         | 
| 122 | 
            -
                - -  | 
| 122 | 
            +
                - - '>='
         | 
| 123 123 | 
             
                  - !ruby/object:Gem::Version
         | 
| 124 124 | 
             
                    version: '0'
         | 
| 125 125 | 
             
            description: This library manages the core operations for the discovery indexing such
         | 
| @@ -133,7 +133,6 @@ files: | |
| 133 133 | 
             
            - lib/errors.rb
         | 
| 134 134 | 
             
            - lib/logging.rb
         | 
| 135 135 | 
             
            - lib/mapper/general_mapper.rb
         | 
| 136 | 
            -
            - lib/mapper/index_mapper.rb
         | 
| 137 136 | 
             
            - lib/reader/modsxml.rb
         | 
| 138 137 | 
             
            - lib/reader/modsxml_reader.rb
         | 
| 139 138 | 
             
            - lib/reader/purlxml.rb
         | 
| @@ -155,12 +154,12 @@ require_paths: | |
| 155 154 | 
             
            - lib
         | 
| 156 155 | 
             
            required_ruby_version: !ruby/object:Gem::Requirement
         | 
| 157 156 | 
             
              requirements:
         | 
| 158 | 
            -
              - -  | 
| 157 | 
            +
              - - '>='
         | 
| 159 158 | 
             
                - !ruby/object:Gem::Version
         | 
| 160 159 | 
             
                  version: '0'
         | 
| 161 160 | 
             
            required_rubygems_version: !ruby/object:Gem::Requirement
         | 
| 162 161 | 
             
              requirements:
         | 
| 163 | 
            -
              - -  | 
| 162 | 
            +
              - - '>='
         | 
| 164 163 | 
             
                - !ruby/object:Gem::Version
         | 
| 165 164 | 
             
                  version: '0'
         | 
| 166 165 | 
             
            requirements: []
         | 
    
        data/lib/mapper/index_mapper.rb
    DELETED
    
    | @@ -1,179 +0,0 @@ | |
| 1 | 
            -
            module DiscoveryIndexer
         | 
| 2 | 
            -
              module Mapper
         | 
| 3 | 
            -
                
         | 
| 4 | 
            -
                # This class is responsible for creating the solr_doc hash based on the input
         | 
| 5 | 
            -
                # of druid_id, modsxml, purlxml, and optional hash of collection_names
         | 
| 6 | 
            -
                class IndexMapper < GeneralMapper
         | 
| 7 | 
            -
                  
         | 
| 8 | 
            -
                  # Initializes an instance from IndexMapper
         | 
| 9 | 
            -
                  # @param [String] druid e.g. ab123cd4567
         | 
| 10 | 
            -
                  # @param [Stanford::Mods::Record] modsxml represents the MODS xml for the druid
         | 
| 11 | 
            -
                  # @param [DiscoveryIndexer::Reader::PurlxmlModel] purlxml represents the purlxml model
         | 
| 12 | 
            -
                  # @param [Hash] collection_names represents a hash of collection_druid and 
         | 
| 13 | 
            -
                  #  collection_name !{"aa111aa1111"=>"First Collection", "bb123bb1234"=>"Second Collection"}
         | 
| 14 | 
            -
                  def initialize(druid, modsxml, purlxml, collection_names={})
         | 
| 15 | 
            -
                    super druid, modsxml, purlxml, collection_names
         | 
| 16 | 
            -
                  end  
         | 
| 17 | 
            -
                  
         | 
| 18 | 
            -
                  # Create a Hash representing a Solr doc, with all MODS related fields populated.  
         | 
| 19 | 
            -
                  # @return [Hash] Hash representing the Solr document
         | 
| 20 | 
            -
                  def map()
         | 
| 21 | 
            -
                    solr_doc = {}
         | 
| 22 | 
            -
                    solr_doc[:id] = @druid
         | 
| 23 | 
            -
                    solr_doc.update mods_to_title_fields
         | 
| 24 | 
            -
                    solr_doc.update mods_to_author_fields
         | 
| 25 | 
            -
                    solr_doc.update mods_to_subject_search_fields
         | 
| 26 | 
            -
                    solr_doc.update mods_to_publication_fields
         | 
| 27 | 
            -
                    solr_doc.update mods_to_pub_date
         | 
| 28 | 
            -
                    solr_doc.update mods_to_others
         | 
| 29 | 
            -
                    
         | 
| 30 | 
            -
                    solr_doc[:all_search] = @modsxml.text.gsub(/\s+/, ' ')
         | 
| 31 | 
            -
                    return solr_doc
         | 
| 32 | 
            -
                  end
         | 
| 33 | 
            -
             | 
| 34 | 
            -
                  # @return [Hash] Hash representing the title fields
         | 
| 35 | 
            -
                  def mods_to_title_fields
         | 
| 36 | 
            -
                    # title fields
         | 
| 37 | 
            -
                    doc_hash = { 
         | 
| 38 | 
            -
                      :title_245a_search => @modsxml.sw_short_title,
         | 
| 39 | 
            -
                      :title_245_search => @modsxml.sw_full_title,
         | 
| 40 | 
            -
                      :title_variant_search => @modsxml.sw_addl_titles,
         | 
| 41 | 
            -
                      :title_sort => @modsxml.sw_sort_title,
         | 
| 42 | 
            -
                      :title_245a_display => @modsxml.sw_short_title,
         | 
| 43 | 
            -
                      :title_display => @modsxml.sw_title_display,
         | 
| 44 | 
            -
                      :title_full_display => @modsxml.sw_full_title,
         | 
| 45 | 
            -
                    }
         | 
| 46 | 
            -
                    doc_hash
         | 
| 47 | 
            -
                  end
         | 
| 48 | 
            -
                  
         | 
| 49 | 
            -
                  # @return [Hash] Hash representing the author fields
         | 
| 50 | 
            -
                  def mods_to_author_fields
         | 
| 51 | 
            -
                    doc_hash = { 
         | 
| 52 | 
            -
                      # author fields
         | 
| 53 | 
            -
                      :author_1xx_search => @modsxml.sw_main_author,
         | 
| 54 | 
            -
                      :author_7xx_search => @modsxml.sw_addl_authors,
         | 
| 55 | 
            -
                      :author_person_facet => @modsxml.sw_person_authors,
         | 
| 56 | 
            -
                      :author_other_facet => @modsxml.sw_impersonal_authors,
         | 
| 57 | 
            -
                      :author_sort => @modsxml.sw_sort_author[1..-1],
         | 
| 58 | 
            -
                      :author_corp_display => @modsxml.sw_corporate_authors,
         | 
| 59 | 
            -
                      :author_meeting_display => @modsxml.sw_meeting_authors,
         | 
| 60 | 
            -
                      :author_person_display => @modsxml.sw_person_authors,
         | 
| 61 | 
            -
                      :author_person_full_display => @modsxml.sw_person_authors,
         | 
| 62 | 
            -
                    }
         | 
| 63 | 
            -
                    doc_hash
         | 
| 64 | 
            -
                  end
         | 
| 65 | 
            -
                  
         | 
| 66 | 
            -
                  # @return [Hash] Hash representing the search fields
         | 
| 67 | 
            -
                  def mods_to_subject_search_fields
         | 
| 68 | 
            -
                    doc_hash = { 
         | 
| 69 | 
            -
                      # subject search fields
         | 
| 70 | 
            -
                      :topic_search => @modsxml.topic_search, 
         | 
| 71 | 
            -
                      :geographic_search => @modsxml.geographic_search,
         | 
| 72 | 
            -
                      :subject_other_search => @modsxml.subject_other_search, 
         | 
| 73 | 
            -
                      :subject_other_subvy_search => @modsxml.subject_other_subvy_search,
         | 
| 74 | 
            -
                      :subject_all_search => @modsxml.subject_all_search, 
         | 
| 75 | 
            -
                      :topic_facet => @modsxml.topic_facet,
         | 
| 76 | 
            -
                      :geographic_facet => @modsxml.geographic_facet,
         | 
| 77 | 
            -
                      :era_facet => @modsxml.era_facet,
         | 
| 78 | 
            -
                    }
         | 
| 79 | 
            -
                  end
         | 
| 80 | 
            -
                  
         | 
| 81 | 
            -
                  # @return [Hash] Hash representing the publication fields
         | 
| 82 | 
            -
                  def mods_to_publication_fields
         | 
| 83 | 
            -
                    doc_hash = { 
         | 
| 84 | 
            -
                      # publication fields
         | 
| 85 | 
            -
                      :pub_search =>  @modsxml.place,
         | 
| 86 | 
            -
                      :pub_date_sort =>  @modsxml.pub_date_sort,
         | 
| 87 | 
            -
                      :imprint_display =>  @modsxml.pub_date_display,
         | 
| 88 | 
            -
                      :pub_date =>  @modsxml.pub_date_facet,
         | 
| 89 | 
            -
                      :pub_date_display =>  @modsxml.pub_date_display, # pub_date_display may be deprecated
         | 
| 90 | 
            -
                    }
         | 
| 91 | 
            -
                  end
         | 
| 92 | 
            -
                  
         | 
| 93 | 
            -
                  # @return [Hash] Hash representing the pub date
         | 
| 94 | 
            -
                  def mods_to_pub_date
         | 
| 95 | 
            -
                    doc_hash = {}
         | 
| 96 | 
            -
                    pub_date_sort = @modsxml.pub_date_sort
         | 
| 97 | 
            -
                    if is_positive_int? pub_date_sort
         | 
| 98 | 
            -
                      doc_hash[:pub_year_tisim] =  pub_date_sort # for date slider
         | 
| 99 | 
            -
                      # put the displayable year in the correct field, :creation_year_isi for example
         | 
| 100 | 
            -
                      doc_hash[date_type_sym] =  @modsxml.pub_date_sort  if date_type_sym
         | 
| 101 | 
            -
                    end
         | 
| 102 | 
            -
                    return doc_hash
         | 
| 103 | 
            -
                  end    
         | 
| 104 | 
            -
                    
         | 
| 105 | 
            -
                  # @return [Hash] Hash representing some fields 
         | 
| 106 | 
            -
                  def mods_to_others
         | 
| 107 | 
            -
                    doc_hash = { 
         | 
| 108 | 
            -
                      :format_main_ssim => format_main_ssim,
         | 
| 109 | 
            -
                      :format => format, # for backwards compatibility
         | 
| 110 | 
            -
                      :language => @modsxml.sw_language_facet,
         | 
| 111 | 
            -
                      :physical =>  @modsxml.term_values([:physical_description, :extent]),
         | 
| 112 | 
            -
                      :summary_search => @modsxml.term_values(:abstract),
         | 
| 113 | 
            -
                      :toc_search => @modsxml.term_values(:tableOfContents),
         | 
| 114 | 
            -
                      :url_suppl => @modsxml.term_values([:related_item, :location, :url]),
         | 
| 115 | 
            -
                    }
         | 
| 116 | 
            -
                    return doc_hash
         | 
| 117 | 
            -
                  end
         | 
| 118 | 
            -
                
         | 
| 119 | 
            -
                  # select one or more format values from the controlled vocabulary here:
         | 
| 120 | 
            -
                  #   http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
         | 
| 121 | 
            -
                  # via stanford-mods gem
         | 
| 122 | 
            -
                  # @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
         | 
| 123 | 
            -
                  def format
         | 
| 124 | 
            -
                    vals = @modsxml.format
         | 
| 125 | 
            -
                    if vals.empty?
         | 
| 126 | 
            -
                      puts "#{@druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements"
         | 
| 127 | 
            -
                    end
         | 
| 128 | 
            -
                    vals
         | 
| 129 | 
            -
                  end
         | 
| 130 | 
            -
                  
         | 
| 131 | 
            -
                  # call stanford-mods format_main to get results
         | 
| 132 | 
            -
                  # @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
         | 
| 133 | 
            -
                  def format_main_ssim
         | 
| 134 | 
            -
                    vals = @modsxml.format_main
         | 
| 135 | 
            -
                    if vals.empty?
         | 
| 136 | 
            -
                      puts "#{@druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements"
         | 
| 137 | 
            -
                    end
         | 
| 138 | 
            -
                    vals
         | 
| 139 | 
            -
                  end
         | 
| 140 | 
            -
                
         | 
| 141 | 
            -
                  # call stanford-mods sw_genre to get results
         | 
| 142 | 
            -
                  # @return [Array<String>] value(s) 
         | 
| 143 | 
            -
                  def genre_ssim
         | 
| 144 | 
            -
                    @modsxml.sw_genre
         | 
| 145 | 
            -
                  end
         | 
| 146 | 
            -
                
         | 
| 147 | 
            -
                protected
         | 
| 148 | 
            -
                
         | 
| 149 | 
            -
                  # @return true if the string parses into an int, and if so, the int is >= 0
         | 
| 150 | 
            -
                  def is_positive_int? str
         | 
| 151 | 
            -
                    begin
         | 
| 152 | 
            -
                      if str.to_i >= 0
         | 
| 153 | 
            -
                        return true
         | 
| 154 | 
            -
                      else
         | 
| 155 | 
            -
                        return false
         | 
| 156 | 
            -
                      end
         | 
| 157 | 
            -
                    rescue
         | 
| 158 | 
            -
                    end
         | 
| 159 | 
            -
                    return false
         | 
| 160 | 
            -
                  end
         | 
| 161 | 
            -
                
         | 
| 162 | 
            -
                  # determines particular flavor of displayable publication year field 
         | 
| 163 | 
            -
                  # @return Solr field name as a symbol
         | 
| 164 | 
            -
                  def date_type_sym
         | 
| 165 | 
            -
                    vals = @modsxml.term_values([:origin_info,:dateIssued])
         | 
| 166 | 
            -
                    if vals and vals.length > 0
         | 
| 167 | 
            -
                      return :publication_year_isi
         | 
| 168 | 
            -
                    end
         | 
| 169 | 
            -
                    vals = @modsxml.term_values([:origin_info,:dateCreated])  
         | 
| 170 | 
            -
                    if vals and vals.length > 0
         | 
| 171 | 
            -
                      return :creation_year_isi
         | 
| 172 | 
            -
                    end
         | 
| 173 | 
            -
                    nil
         | 
| 174 | 
            -
                  end
         | 
| 175 | 
            -
                  
         | 
| 176 | 
            -
                end
         | 
| 177 | 
            -
              end
         | 
| 178 | 
            -
            end
         | 
| 179 | 
            -
              
         |