discovery-indexer 0.9.6 → 0.9.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
 - data/lib/discovery-indexer.rb +2 -3
 - data/lib/errors.rb +8 -8
 - data/lib/logging.rb +3 -6
 - data/lib/mapper/general_mapper.rb +4 -6
 - data/lib/reader/modsxml.rb +16 -20
 - data/lib/reader/modsxml_reader.rb +2 -5
 - data/lib/reader/purlxml.rb +16 -23
 - data/lib/reader/purlxml_model.rb +23 -31
 - data/lib/reader/purlxml_parser.rb +2 -3
 - data/lib/reader/purlxml_parser_strict.rb +99 -111
 - data/lib/reader/purlxml_reader.rb +2 -3
 - data/lib/version.rb +1 -1
 - data/lib/writer/solr_client.rb +30 -32
 - data/lib/writer/solr_writer.rb +15 -15
 - metadata +4 -3
 
    
        checksums.yaml
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            ---
         
     | 
| 
       2 
2 
     | 
    
         
             
            SHA1:
         
     | 
| 
       3 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       4 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 3 
     | 
    
         
            +
              metadata.gz: 9cddff2ef4144cc3d17af7bd71bdb6f68840d38c
         
     | 
| 
      
 4 
     | 
    
         
            +
              data.tar.gz: ca19a7acbb80341c1d6440567e305ef0bf170542
         
     | 
| 
       5 
5 
     | 
    
         
             
            SHA512:
         
     | 
| 
       6 
     | 
    
         
            -
              metadata.gz:  
     | 
| 
       7 
     | 
    
         
            -
              data.tar.gz:  
     | 
| 
      
 6 
     | 
    
         
            +
              metadata.gz: 65fa148806dcf8dc498b262cbc3acf811c89fcad0d81397099009e0e8eb368577477e50c9c5bbee7e0921e11b17542cf4857b2aff4e54a319f5f97ea22e1bb8f
         
     | 
| 
      
 7 
     | 
    
         
            +
              data.tar.gz: a28465c230937c24168114d91f41a58e59492a16b0b479ab8e4ae41429c4682dc7eec99a4b796ff93869d37b7e34ed78a6d7485ff1500ee2bc91e99e3a6e070f
         
     | 
    
        data/lib/discovery-indexer.rb
    CHANGED
    
    | 
         @@ -15,9 +15,8 @@ require 'mapper/general_mapper' 
     | 
|
| 
       15 
15 
     | 
    
         
             
            require 'writer/solr_client'
         
     | 
| 
       16 
16 
     | 
    
         
             
            require 'writer/solr_writer'
         
     | 
| 
       17 
17 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
            #require 'utilities/extract_sub_targets'
         
     | 
| 
       19 
     | 
    
         
            -
             
     | 
| 
      
 18 
     | 
    
         
            +
            # require 'utilities/extract_sub_targets'
         
     | 
| 
       20 
19 
     | 
    
         | 
| 
       21 
20 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       22 
21 
     | 
    
         
             
              PURL_DEFAULT = 'http://purl.stanford.edu'
         
     | 
| 
       23 
     | 
    
         
            -
            end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/errors.rb
    CHANGED
    
    | 
         @@ -1,13 +1,13 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Errors
         
     | 
| 
       3 
     | 
    
         
            -
                MissingPurlPage = Class.new(StandardError) 
     | 
| 
       4 
     | 
    
         
            -
                MissingMods = Class.new(StandardError) 
     | 
| 
       5 
     | 
    
         
            -
                MissingPublicXml = Class.new(StandardError) 
     | 
| 
       6 
     | 
    
         
            -
                MissingContentMetadata = Class.new(StandardError) 
     | 
| 
       7 
     | 
    
         
            -
                MissingIdentityMetadata = Class.new(StandardError) 
     | 
| 
       8 
     | 
    
         
            -
                MissingRightsMetadata = Class.new(StandardError) 
     | 
| 
       9 
     | 
    
         
            -
                MissingRDF = Class.new(StandardError) 
     | 
| 
      
 3 
     | 
    
         
            +
                MissingPurlPage = Class.new(StandardError)
         
     | 
| 
      
 4 
     | 
    
         
            +
                MissingMods = Class.new(StandardError)
         
     | 
| 
      
 5 
     | 
    
         
            +
                MissingPublicXml = Class.new(StandardError)
         
     | 
| 
      
 6 
     | 
    
         
            +
                MissingContentMetadata = Class.new(StandardError)
         
     | 
| 
      
 7 
     | 
    
         
            +
                MissingIdentityMetadata = Class.new(StandardError)
         
     | 
| 
      
 8 
     | 
    
         
            +
                MissingRightsMetadata = Class.new(StandardError)
         
     | 
| 
      
 9 
     | 
    
         
            +
                MissingRDF = Class.new(StandardError)
         
     | 
| 
       10 
10 
     | 
    
         
             
                MissingDC = Class.new(StandardError)
         
     | 
| 
       11 
11 
     | 
    
         
             
                MissingModsPage = Class.new(StandardError)
         
     | 
| 
       12 
12 
     | 
    
         
             
              end
         
     | 
| 
       13 
     | 
    
         
            -
            end
         
     | 
| 
      
 13 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/logging.rb
    CHANGED
    
    
| 
         @@ -1,29 +1,27 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       2 
2 
     | 
    
         
             
              module Mapper
         
     | 
| 
       3 
3 
     | 
    
         
             
                class GeneralMapper
         
     | 
| 
       4 
     | 
    
         
            -
                  
         
     | 
| 
       5 
4 
     | 
    
         
             
                  # Initializes an instance from IndexMapper
         
     | 
| 
       6 
5 
     | 
    
         
             
                  # @param [String] druid e.g. ab123cd4567
         
     | 
| 
       7 
6 
     | 
    
         
             
                  # @param [Stanford::Mods::Record] modsxml represents the MODS xml for the druid
         
     | 
| 
       8 
7 
     | 
    
         
             
                  # @param [DiscoveryIndexer::Reader::PurlxmlModel] purlxml represents the purlxml model
         
     | 
| 
       9 
8 
     | 
    
         
             
                  # @param [Hash] collection_data represents a hash of collection_druid and catkey
         
     | 
| 
       10 
9 
     | 
    
         
             
                  # e.g. @collection_data = {'aa00bb0001'=>{:name=>'Test Collection Name',:ckey=>'000001'},'nt028fd5773'=>{:name=>'Revs Institute Archive',:ckey=>'000002'}}
         
     | 
| 
       11 
     | 
    
         
            -
                  def initialize(druid, modsxml, purlxml, collection_data={})
         
     | 
| 
      
 10 
     | 
    
         
            +
                  def initialize(druid, modsxml, purlxml, collection_data = {})
         
     | 
| 
       12 
11 
     | 
    
         
             
                    @druid = druid
         
     | 
| 
       13 
12 
     | 
    
         
             
                    @modsxml = modsxml
         
     | 
| 
       14 
13 
     | 
    
         
             
                    @purlxml = purlxml
         
     | 
| 
       15 
14 
     | 
    
         
             
                    @collection_data = collection_data
         
     | 
| 
       16 
15 
     | 
    
         
             
                  end
         
     | 
| 
       17 
16 
     | 
    
         | 
| 
       18 
     | 
    
         
            -
                  # Create a Hash representing a Solr doc, with all MODS related fields populated. 
     | 
| 
      
 17 
     | 
    
         
            +
                  # Create a Hash representing a Solr doc, with all MODS related fields populated.
         
     | 
| 
       19 
18 
     | 
    
         
             
                  # @return [Hash] Hash representing the Solr document
         
     | 
| 
       20 
     | 
    
         
            -
                  def convert_to_solr_doc 
     | 
| 
      
 19 
     | 
    
         
            +
                  def convert_to_solr_doc
         
     | 
| 
       21 
20 
     | 
    
         
             
                    solr_doc = {}
         
     | 
| 
       22 
21 
     | 
    
         
             
                    solr_doc[:id] = @druid
         
     | 
| 
       23 
22 
     | 
    
         
             
                    solr_doc[:title] = @modsxml.sw_full_title
         
     | 
| 
       24 
     | 
    
         
            -
                     
     | 
| 
      
 23 
     | 
    
         
            +
                    solr_doc
         
     | 
| 
       25 
24 
     | 
    
         
             
                  end
         
     | 
| 
       26 
25 
     | 
    
         
             
                end
         
     | 
| 
       27 
26 
     | 
    
         
             
              end
         
     | 
| 
       28 
27 
     | 
    
         
             
            end
         
     | 
| 
       29 
     | 
    
         
            -
              
         
     | 
    
        data/lib/reader/modsxml.rb
    CHANGED
    
    | 
         @@ -1,44 +1,40 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            require 'stanford-mods'
         
     | 
| 
       2 
2 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       3 
3 
     | 
    
         
             
              module InputXml
         
     | 
| 
       4 
     | 
    
         
            -
                
         
     | 
| 
       5 
     | 
    
         
            -
                # This class is the main class to access and parse the mods xml 
         
     | 
| 
      
 4 
     | 
    
         
            +
                # This class is the main class to access and parse the mods xml
         
     | 
| 
       6 
5 
     | 
    
         
             
                #    as retrieved from PURL server
         
     | 
| 
       7 
6 
     | 
    
         
             
                # @example to run the code
         
     | 
| 
       8 
7 
     | 
    
         
             
                #  druid = "aa111aa1111"
         
     | 
| 
       9 
8 
     | 
    
         
             
                #  p =  DiscoveryIndexer::InputXml::Modsxml.new(druid)
         
     | 
| 
       10 
9 
     | 
    
         
             
                #  model =  p.load()
         
     | 
| 
       11 
     | 
    
         
            -
                # 
     | 
| 
       12 
     | 
    
         
            -
                # 
     | 
| 
      
 10 
     | 
    
         
            +
                #
         
     | 
| 
      
 11 
     | 
    
         
            +
                #
         
     | 
| 
       13 
12 
     | 
    
         
             
                class Modsxml
         
     | 
| 
       14 
     | 
    
         
            -
                  # initializes a new object 
     | 
| 
      
 13 
     | 
    
         
            +
                  # initializes a new object
         
     | 
| 
       15 
14 
     | 
    
         
             
                  # @param druid [String] the druid object in the format "aa111aa1111"
         
     | 
| 
       16 
15 
     | 
    
         
             
                  def initialize(druid)
         
     | 
| 
       17 
16 
     | 
    
         
             
                    @druid = druid
         
     | 
| 
       18 
     | 
    
         
            -
                    @modsxml_ng_doc = nil 
     | 
| 
      
 17 
     | 
    
         
            +
                    @modsxml_ng_doc = nil
         
     | 
| 
       19 
18 
     | 
    
         
             
                  end
         
     | 
| 
       20 
19 
     | 
    
         | 
| 
       21 
20 
     | 
    
         
             
                  # loads the mods xml to stanford mods model for the fedora object defind in the druid,
         
     | 
| 
       22 
     | 
    
         
            -
                  # 
     | 
| 
       23 
     | 
    
         
            -
                  # @return [Stanford::Mods::Record] represents the mods xml 
     | 
| 
       24 
     | 
    
         
            -
                  def load 
     | 
| 
       25 
     | 
    
         
            -
                    if @modsxml_ng_doc.nil? 
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
       27 
     | 
    
         
            -
                    end
         
     | 
| 
       28 
     | 
    
         
            -
                    
         
     | 
| 
      
 21 
     | 
    
         
            +
                  # it reads the mods xml once from PURL server, and repeat the parsing with each call
         
     | 
| 
      
 22 
     | 
    
         
            +
                  # @return [Stanford::Mods::Record] represents the mods xml
         
     | 
| 
      
 23 
     | 
    
         
            +
                  def load
         
     | 
| 
      
 24 
     | 
    
         
            +
                    @modsxml_ng_doc = ModsxmlReader.read(@druid) if @modsxml_ng_doc.nil?
         
     | 
| 
      
 25 
     | 
    
         
            +
             
     | 
| 
       29 
26 
     | 
    
         
             
                    modsxml_model = Stanford::Mods::Record.new
         
     | 
| 
       30 
27 
     | 
    
         
             
                    modsxml_model.from_nk_node(@modsxml_ng_doc)
         
     | 
| 
       31 
     | 
    
         
            -
                     
     | 
| 
      
 28 
     | 
    
         
            +
                    modsxml_model
         
     | 
| 
       32 
29 
     | 
    
         
             
                  end
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 30 
     | 
    
         
            +
             
     | 
| 
       34 
31 
     | 
    
         
             
                  # loads the mods xml to stanford mods model for the fedora object defind in the druid,
         
     | 
| 
       35 
     | 
    
         
            -
                  # 
     | 
| 
       36 
     | 
    
         
            -
                  # @return [Stanford::Mods::Record] represents the mods xml 
     | 
| 
       37 
     | 
    
         
            -
                  def reload 
     | 
| 
      
 32 
     | 
    
         
            +
                  # it reads the mods xml from PURL server with every call
         
     | 
| 
      
 33 
     | 
    
         
            +
                  # @return [Stanford::Mods::Record] represents the mods xml
         
     | 
| 
      
 34 
     | 
    
         
            +
                  def reload
         
     | 
| 
       38 
35 
     | 
    
         
             
                    @modsxml_ng_doc = ModsxmlReader.read(@druid)
         
     | 
| 
       39 
     | 
    
         
            -
                     
     | 
| 
      
 36 
     | 
    
         
            +
                    load
         
     | 
| 
       40 
37 
     | 
    
         
             
                  end
         
     | 
| 
       41 
     | 
    
         
            -
             
     | 
| 
       42 
38 
     | 
    
         
             
                end
         
     | 
| 
       43 
39 
     | 
    
         
             
              end
         
     | 
| 
       44 
40 
     | 
    
         
             
            end
         
     | 
| 
         @@ -3,21 +3,18 @@ require 'open-uri' 
     | 
|
| 
       3 
3 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       4 
4 
     | 
    
         
             
              module InputXml
         
     | 
| 
       5 
5 
     | 
    
         
             
                class ModsxmlReader
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
6 
     | 
    
         
             
                  # reads the mods xml for the fedora object that is defined , from the purl server
         
     | 
| 
       8 
7 
     | 
    
         
             
                  # @param [String] druid e.g. ab123cd4567
         
     | 
| 
       9 
8 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the mods xml for the fedora object
         
     | 
| 
       10 
9 
     | 
    
         
             
                  # @raise [MissingModsXml] if there's no mods xml available for this druid
         
     | 
| 
       11 
10 
     | 
    
         
             
                  def self.read(druid)
         
     | 
| 
       12 
11 
     | 
    
         
             
                    mods_uri = "#{DiscoveryIndexer::PURL_DEFAULT}/#{druid}.mods"
         
     | 
| 
       13 
     | 
    
         
            -
                    
         
     | 
| 
       14 
12 
     | 
    
         
             
                    begin
         
     | 
| 
       15 
     | 
    
         
            -
                       
     | 
| 
       16 
     | 
    
         
            -
                      return modsxml_ng_doc
         
     | 
| 
      
 13 
     | 
    
         
            +
                      Nokogiri::XML(open(mods_uri))
         
     | 
| 
       17 
14 
     | 
    
         
             
                    rescue
         
     | 
| 
       18 
15 
     | 
    
         
             
                      raise DiscoveryIndexer::Errors::MissingModsPage.new(mods_uri)
         
     | 
| 
       19 
16 
     | 
    
         
             
                    end
         
     | 
| 
       20 
17 
     | 
    
         
             
                  end
         
     | 
| 
       21 
18 
     | 
    
         
             
                end
         
     | 
| 
       22 
19 
     | 
    
         
             
              end
         
     | 
| 
       23 
     | 
    
         
            -
            end
         
     | 
| 
      
 20 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/reader/purlxml.rb
    CHANGED
    
    | 
         @@ -1,43 +1,36 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       2 
2 
     | 
    
         
             
              module InputXml
         
     | 
| 
       3 
     | 
    
         
            -
                
         
     | 
| 
       4 
     | 
    
         
            -
                # This class is the main class to access and parse the purl xml 
         
     | 
| 
      
 3 
     | 
    
         
            +
                # This class is the main class to access and parse the purl xml
         
     | 
| 
       5 
4 
     | 
    
         
             
                #    as retrieved from PURL server
         
     | 
| 
       6 
5 
     | 
    
         
             
                # @example to run the code
         
     | 
| 
       7 
6 
     | 
    
         
             
                #  druid = "aa111aa1111"
         
     | 
| 
       8 
7 
     | 
    
         
             
                #  p =  DiscoveryIndexer::InputXml::Purlxml.new(druid)
         
     | 
| 
       9 
8 
     | 
    
         
             
                #  model =  p.load()
         
     | 
| 
       10 
     | 
    
         
            -
                # 
         
     | 
| 
       11 
9 
     | 
    
         
             
                class Purlxml
         
     | 
| 
       12 
     | 
    
         
            -
                  
         
     | 
| 
       13 
     | 
    
         
            -
                  # initializes a new object 
         
     | 
| 
      
 10 
     | 
    
         
            +
                  # initializes a new object
         
     | 
| 
       14 
11 
     | 
    
         
             
                  # @param druid [String] the druid object in the format "aa111aa1111"
         
     | 
| 
       15 
12 
     | 
    
         
             
                  def initialize(druid)
         
     | 
| 
       16 
13 
     | 
    
         
             
                    @druid = druid
         
     | 
| 
       17 
     | 
    
         
            -
                    @purlxml_ng_doc = nil 
     | 
| 
      
 14 
     | 
    
         
            +
                    @purlxml_ng_doc = nil
         
     | 
| 
       18 
15 
     | 
    
         
             
                  end
         
     | 
| 
       19 
16 
     | 
    
         | 
| 
       20 
17 
     | 
    
         
             
                  # loads the purl xml to purlxml model for the fedora object defind in the druid,
         
     | 
| 
       21 
     | 
    
         
            -
                  # 
     | 
| 
       22 
     | 
    
         
            -
                  # @return [PurlxmlModel] represents the purlxml 
     | 
| 
       23 
     | 
    
         
            -
                  def load 
     | 
| 
       24 
     | 
    
         
            -
                    if @purlxml_ng_doc.nil? 
     | 
| 
       25 
     | 
    
         
            -
             
     | 
| 
       26 
     | 
    
         
            -
                     
     | 
| 
       27 
     | 
    
         
            -
                    
         
     | 
| 
       28 
     | 
    
         
            -
                    purlxml_parser = PurlxmlParserStrict.new(@druid,@purlxml_ng_doc)
         
     | 
| 
       29 
     | 
    
         
            -
                    purlxml_model = purlxml_parser.parse()
         
     | 
| 
       30 
     | 
    
         
            -
                    return purlxml_model
         
     | 
| 
      
 18 
     | 
    
         
            +
                  # it reads the purl xml once from PURL server, and repeat the parsing with each call
         
     | 
| 
      
 19 
     | 
    
         
            +
                  # @return [PurlxmlModel] represents the purlxml
         
     | 
| 
      
 20 
     | 
    
         
            +
                  def load
         
     | 
| 
      
 21 
     | 
    
         
            +
                    @purlxml_ng_doc = PurlxmlReader.read(@druid) if @purlxml_ng_doc.nil?
         
     | 
| 
      
 22 
     | 
    
         
            +
                    purlxml_parser = PurlxmlParserStrict.new(@druid, @purlxml_ng_doc)
         
     | 
| 
      
 23 
     | 
    
         
            +
                    purlxml_model = purlxml_parser.parse
         
     | 
| 
      
 24 
     | 
    
         
            +
                    purlxml_model
         
     | 
| 
       31 
25 
     | 
    
         
             
                  end
         
     | 
| 
       32 
     | 
    
         
            -
             
     | 
| 
      
 26 
     | 
    
         
            +
             
     | 
| 
       33 
27 
     | 
    
         
             
                  # loads the purl xml to purlxml model for the fedora object defind in the druid
         
     | 
| 
       34 
     | 
    
         
            -
                  # 
     | 
| 
       35 
     | 
    
         
            -
                  # @return [PurlxmlModel] represents the purlxml 
     | 
| 
       36 
     | 
    
         
            -
                  def reload 
     | 
| 
      
 28 
     | 
    
         
            +
                  # it reads the purl xml from PURL server with every call
         
     | 
| 
      
 29 
     | 
    
         
            +
                  # @return [PurlxmlModel] represents the purlxml
         
     | 
| 
      
 30 
     | 
    
         
            +
                  def reload
         
     | 
| 
       37 
31 
     | 
    
         
             
                    @purlxml_ng_doc = PurlxmlReader.read(@druid)
         
     | 
| 
       38 
     | 
    
         
            -
                     
     | 
| 
      
 32 
     | 
    
         
            +
                    load
         
     | 
| 
       39 
33 
     | 
    
         
             
                  end
         
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
      
 34 
     | 
    
         
            +
                end
         
     | 
| 
       41 
35 
     | 
    
         
             
              end
         
     | 
| 
       42 
36 
     | 
    
         
             
            end
         
     | 
| 
       43 
     | 
    
         
            -
              
         
     | 
    
        data/lib/reader/purlxml_model.rb
    CHANGED
    
    | 
         @@ -1,47 +1,46 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       2 
2 
     | 
    
         
             
              module InputXml
         
     | 
| 
       3 
3 
     | 
    
         
             
                class PurlxmlModel
         
     | 
| 
       4 
     | 
    
         
            -
                  
         
     | 
| 
       5 
     | 
    
         
            -
                   
     | 
| 
       6 
     | 
    
         
            -
                   
     | 
| 
       7 
     | 
    
         
            -
             
     | 
| 
       8 
     | 
    
         
            -
                  
         
     | 
| 
       9 
     | 
    
         
            -
                   
     | 
| 
       10 
     | 
    
         
            -
                  # @return [Nokogiri::XML] The publix xml as retrieved from purl server 
         
     | 
| 
      
 4 
     | 
    
         
            +
                  # @!attribute [rw] druid
         
     | 
| 
      
 5 
     | 
    
         
            +
                  # @return [String] The druid value eg., ab123cd4567
         
     | 
| 
      
 6 
     | 
    
         
            +
                  attr_accessor :druid
         
     | 
| 
      
 7 
     | 
    
         
            +
             
     | 
| 
      
 8 
     | 
    
         
            +
                  # @!attribute [rw] public_xml
         
     | 
| 
      
 9 
     | 
    
         
            +
                  # @return [Nokogiri::XML] The publix xml as retrieved from purl server
         
     | 
| 
       11 
10 
     | 
    
         
             
                  attr_accessor :public_xml
         
     | 
| 
       12 
11 
     | 
    
         | 
| 
       13 
     | 
    
         
            -
                   
     | 
| 
      
 12 
     | 
    
         
            +
                  # @!attribute [rw] content_metadata
         
     | 
| 
       14 
13 
     | 
    
         
             
                  # @return [Nokogiri::XML] The content_metadata as extracted from public xml
         
     | 
| 
       15 
14 
     | 
    
         
             
                  attr_accessor :content_metadata
         
     | 
| 
       16 
15 
     | 
    
         | 
| 
       17 
     | 
    
         
            -
                   
     | 
| 
      
 16 
     | 
    
         
            +
                  # @!attribute [rw] identity_metadata
         
     | 
| 
       18 
17 
     | 
    
         
             
                  # @return [Nokogiri::XML] The identity_metadata as extracted from public xml
         
     | 
| 
       19 
18 
     | 
    
         
             
                  attr_accessor :identity_metadata
         
     | 
| 
       20 
19 
     | 
    
         | 
| 
       21 
     | 
    
         
            -
                   
     | 
| 
      
 20 
     | 
    
         
            +
                  # @!attribute [rw] rights_metadata
         
     | 
| 
       22 
21 
     | 
    
         
             
                  # @return [Nokogiri::XML] The rights_metadata as extracted from public xml
         
     | 
| 
       23 
22 
     | 
    
         
             
                  attr_accessor :rights_metadata
         
     | 
| 
       24 
23 
     | 
    
         | 
| 
       25 
     | 
    
         
            -
                   
     | 
| 
      
 24 
     | 
    
         
            +
                  # @!attribute [rw] dc
         
     | 
| 
       26 
25 
     | 
    
         
             
                  # @return [Nokogiri::XML] The dc element as extracted from public xml
         
     | 
| 
       27 
26 
     | 
    
         
             
                  attr_accessor :dc
         
     | 
| 
       28 
     | 
    
         
            -
             
     | 
| 
       29 
     | 
    
         
            -
                   
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
      
 28 
     | 
    
         
            +
                  # @!attribute [rw] rdf
         
     | 
| 
       30 
29 
     | 
    
         
             
                  # @return [Nokogiri::XML] The rdf element as extracted from public xml
         
     | 
| 
       31 
30 
     | 
    
         
             
                  attr_accessor :rdf
         
     | 
| 
       32 
31 
     | 
    
         | 
| 
       33 
32 
     | 
    
         
             
                  # @!attribute [rw] release_tags_hash
         
     | 
| 
       34 
33 
     | 
    
         
             
                  # @return [Hash] The release_tag in hash format as extracted from public xml
         
     | 
| 
       35 
     | 
    
         
            -
                  #  ReleaseData element. 
     | 
| 
      
 34 
     | 
    
         
            +
                  #  ReleaseData element.
         
     | 
| 
       36 
35 
     | 
    
         
             
                  # @example
         
     | 
| 
       37 
36 
     | 
    
         
             
                  #  !{"target1"=>true, "target2"=>false}
         
     | 
| 
       38 
     | 
    
         
            -
                  attr_accessor :release_tags_hash 
     | 
| 
      
 37 
     | 
    
         
            +
                  attr_accessor :release_tags_hash
         
     | 
| 
       39 
38 
     | 
    
         | 
| 
       40 
39 
     | 
    
         
             
                  # @!attribute [rw] dor_content_type
         
     | 
| 
       41 
40 
     | 
    
         
             
                  # @return [String] The dor_content_type as extracted from public xml
         
     | 
| 
       42 
41 
     | 
    
         
             
                  #  content_metadata.
         
     | 
| 
       43 
42 
     | 
    
         
             
                  attr_accessor :dor_content_type
         
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
       45 
44 
     | 
    
         
             
                  # @!attribute [rw] dor_display_type
         
     | 
| 
       46 
45 
     | 
    
         
             
                  # @return [String] The displayType as extracted from public xml
         
     | 
| 
       47 
46 
     | 
    
         
             
                  #  identity_metadata.
         
     | 
| 
         @@ -50,25 +49,25 @@ module DiscoveryIndexer 
     | 
|
| 
       50 
49 
     | 
    
         
             
                  # @!attribute [rw] is_collection
         
     | 
| 
       51 
50 
     | 
    
         
             
                  # @return [Boolean] true if the item type is collection in the identity_metadata
         
     | 
| 
       52 
51 
     | 
    
         
             
                  attr_accessor :is_collection
         
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
      
 52 
     | 
    
         
            +
             
     | 
| 
       54 
53 
     | 
    
         
             
                  # @!attribute [rw] collection_druids
         
     | 
| 
       55 
54 
     | 
    
         
             
                  # @return [Array] a list of the collections that this is druid belongs to
         
     | 
| 
       56 
55 
     | 
    
         
             
                  # @example
         
     | 
| 
       57 
56 
     | 
    
         
             
                  #  ["aa11aaa1111","bb111bb1111"]
         
     | 
| 
       58 
57 
     | 
    
         
             
                  attr_accessor :collection_druids
         
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
      
 58 
     | 
    
         
            +
             
     | 
| 
       60 
59 
     | 
    
         
             
                  # @!attribute [rw] file_ids
         
     | 
| 
       61 
60 
     | 
    
         
             
                  # @return [Array] a list of the file ids in the content_metadata
         
     | 
| 
       62 
61 
     | 
    
         
             
                  # @example
         
     | 
| 
       63 
     | 
    
         
            -
                  #  ["pc0065_b08_f10_i031.txt","pc0065_b08_f10_i032.txt"] 
     | 
| 
      
 62 
     | 
    
         
            +
                  #  ["pc0065_b08_f10_i031.txt","pc0065_b08_f10_i032.txt"]
         
     | 
| 
       64 
63 
     | 
    
         
             
                  attr_accessor :file_ids
         
     | 
| 
       65 
64 
     | 
    
         | 
| 
       66 
65 
     | 
    
         
             
                  # @!attribute [rw] image_ids
         
     | 
| 
       67 
66 
     | 
    
         
             
                  # @return [Array] a list of the image ids in the content_metadata
         
     | 
| 
       68 
67 
     | 
    
         
             
                  # @example
         
     | 
| 
       69 
     | 
    
         
            -
                  #  ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"] 
     | 
| 
      
 68 
     | 
    
         
            +
                  #  ["pc0065_b08_f10_i031.jp2","pc0065_b08_f10_i032.jp2"]
         
     | 
| 
       70 
69 
     | 
    
         
             
                  attr_accessor :image_ids
         
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
      
 70 
     | 
    
         
            +
             
     | 
| 
       72 
71 
     | 
    
         
             
                  # @!attribute [rw] catkey
         
     | 
| 
       73 
72 
     | 
    
         
             
                  # @return [String] the catkey attribute in identity_metadata
         
     | 
| 
       74 
73 
     | 
    
         
             
                  attr_accessor :catkey
         
     | 
| 
         @@ -76,15 +75,15 @@ module DiscoveryIndexer 
     | 
|
| 
       76 
75 
     | 
    
         
             
                  # @!attribute [rw] barcode
         
     | 
| 
       77 
76 
     | 
    
         
             
                  # @return [String] the barcode attribute in identity_metadata
         
     | 
| 
       78 
77 
     | 
    
         
             
                  attr_accessor :barcode
         
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
      
 78 
     | 
    
         
            +
             
     | 
| 
       80 
79 
     | 
    
         
             
                  # @!attribute [rw] label
         
     | 
| 
       81 
80 
     | 
    
         
             
                  # @return [String] the objectLabel attribute in identity_metadata
         
     | 
| 
       82 
81 
     | 
    
         
             
                  attr_accessor :label
         
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
      
 82 
     | 
    
         
            +
             
     | 
| 
       84 
83 
     | 
    
         
             
                  # @!attribute [rw] copyright
         
     | 
| 
       85 
84 
     | 
    
         
             
                  # @return [String] the copyright statement from rights metadata
         
     | 
| 
       86 
85 
     | 
    
         
             
                  attr_accessor :copyright
         
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
      
 86 
     | 
    
         
            +
             
     | 
| 
       88 
87 
     | 
    
         
             
                  # @!attribute [rw] use_and_reproduction
         
     | 
| 
       89 
88 
     | 
    
         
             
                  # @return [String] the use and reproduction statement from rights metadata
         
     | 
| 
       90 
89 
     | 
    
         
             
                  attr_accessor :use_and_reproduction
         
     | 
| 
         @@ -92,13 +91,6 @@ module DiscoveryIndexer 
     | 
|
| 
       92 
91 
     | 
    
         
             
                  # @!attribute [rw] source_id
         
     | 
| 
       93 
92 
     | 
    
         
             
                  # @return [String] the sourceid from identity metadata
         
     | 
| 
       94 
93 
     | 
    
         
             
                  attr_accessor :source_id
         
     | 
| 
       95 
     | 
    
         
            -
                        
         
     | 
| 
       96 
94 
     | 
    
         
             
                end
         
     | 
| 
       97 
95 
     | 
    
         
             
              end
         
     | 
| 
       98 
96 
     | 
    
         
             
            end
         
     | 
| 
       99 
     | 
    
         
            -
              
         
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
         @@ -3,154 +3,142 @@ module DiscoveryIndexer 
     | 
|
| 
       3 
3 
     | 
    
         
             
                class PurlxmlParserStrict < PurlxmlParser
         
     | 
| 
       4 
4 
     | 
    
         
             
                  include DiscoveryIndexer::Logging
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
       6 
     | 
    
         
            -
                  
         
     | 
| 
       7 
6 
     | 
    
         
             
                  RDF_NAMESPACE = 'http://www.w3.org/1999/02/22-rdf-syntax-ns#'
         
     | 
| 
       8 
7 
     | 
    
         
             
                  OAI_DC_NAMESPACE = 'http://www.openarchives.org/OAI/2.0/oai_dc/'
         
     | 
| 
       9 
8 
     | 
    
         
             
                  MODS_NAMESPACE = 'http://www.loc.gov/mods/v3'
         
     | 
| 
       10 
9 
     | 
    
         | 
| 
       11 
10 
     | 
    
         
             
                  # it parses the purlxml into a purlxml model
         
     | 
| 
       12 
11 
     | 
    
         
             
                  # @return [PurlxmlModel] represents the purlxml as parsed based on the parser rules
         
     | 
| 
       13 
     | 
    
         
            -
                  def parse 
     | 
| 
      
 12 
     | 
    
         
            +
                  def parse
         
     | 
| 
       14 
13 
     | 
    
         
             
                    purlxml_model = PurlxmlModel.new
         
     | 
| 
       15 
14 
     | 
    
         
             
                    purlxml_model.druid             = @druid
         
     | 
| 
       16 
15 
     | 
    
         
             
                    purlxml_model.public_xml        = @purlxml_ng_doc
         
     | 
| 
       17 
     | 
    
         
            -
                    purlxml_model.content_metadata  = parse_content_metadata 
     | 
| 
       18 
     | 
    
         
            -
                    purlxml_model.identity_metadata = parse_identity_metadata 
     | 
| 
       19 
     | 
    
         
            -
                    purlxml_model.rights_metadata   = parse_rights_metadata 
     | 
| 
       20 
     | 
    
         
            -
                    purlxml_model.dc                = parse_dc 
     | 
| 
       21 
     | 
    
         
            -
                    purlxml_model.rdf               = parse_rdf 
     | 
| 
       22 
     | 
    
         
            -
                    purlxml_model.is_collection     = parse_is_collection 
     | 
| 
       23 
     | 
    
         
            -
                    purlxml_model.collection_druids = parse_collection_druids 
     | 
| 
       24 
     | 
    
         
            -
                    purlxml_model.dor_content_type  = parse_dor_content_type 
     | 
| 
       25 
     | 
    
         
            -
                    purlxml_model.dor_display_type  = parse_dor_display_type 
     | 
| 
       26 
     | 
    
         
            -
                    purlxml_model.release_tags_hash = parse_release_tags_hash 
     | 
| 
       27 
     | 
    
         
            -
                    purlxml_model.file_ids          = parse_file_ids 
     | 
| 
       28 
     | 
    
         
            -
                    purlxml_model.image_ids         = parse_image_ids 
     | 
| 
       29 
     | 
    
         
            -
                    purlxml_model.catkey            = parse_catkey 
     | 
| 
       30 
     | 
    
         
            -
                    purlxml_model.barcode           = parse_barcode 
     | 
| 
       31 
     | 
    
         
            -
                    purlxml_model.label             = parse_label 
     | 
| 
       32 
     | 
    
         
            -
                    purlxml_model.copyright         = parse_copyright 
     | 
| 
       33 
     | 
    
         
            -
                    purlxml_model.use_and_reproduction = parse_use_and_reproduction 
     | 
| 
       34 
     | 
    
         
            -
                    purlxml_model.source_id 
     | 
| 
       35 
     | 
    
         
            -
                     
     | 
| 
       36 
     | 
    
         
            -
                  end
         
     | 
| 
       37 
     | 
    
         
            -
             
     | 
| 
      
 16 
     | 
    
         
            +
                    purlxml_model.content_metadata  = parse_content_metadata
         
     | 
| 
      
 17 
     | 
    
         
            +
                    purlxml_model.identity_metadata = parse_identity_metadata
         
     | 
| 
      
 18 
     | 
    
         
            +
                    purlxml_model.rights_metadata   = parse_rights_metadata
         
     | 
| 
      
 19 
     | 
    
         
            +
                    purlxml_model.dc                = parse_dc
         
     | 
| 
      
 20 
     | 
    
         
            +
                    purlxml_model.rdf               = parse_rdf
         
     | 
| 
      
 21 
     | 
    
         
            +
                    purlxml_model.is_collection     = parse_is_collection
         
     | 
| 
      
 22 
     | 
    
         
            +
                    purlxml_model.collection_druids = parse_collection_druids
         
     | 
| 
      
 23 
     | 
    
         
            +
                    purlxml_model.dor_content_type  = parse_dor_content_type
         
     | 
| 
      
 24 
     | 
    
         
            +
                    purlxml_model.dor_display_type  = parse_dor_display_type
         
     | 
| 
      
 25 
     | 
    
         
            +
                    purlxml_model.release_tags_hash = parse_release_tags_hash
         
     | 
| 
      
 26 
     | 
    
         
            +
                    purlxml_model.file_ids          = parse_file_ids
         
     | 
| 
      
 27 
     | 
    
         
            +
                    purlxml_model.image_ids         = parse_image_ids
         
     | 
| 
      
 28 
     | 
    
         
            +
                    purlxml_model.catkey            = parse_catkey
         
     | 
| 
      
 29 
     | 
    
         
            +
                    purlxml_model.barcode           = parse_barcode
         
     | 
| 
      
 30 
     | 
    
         
            +
                    purlxml_model.label             = parse_label
         
     | 
| 
      
 31 
     | 
    
         
            +
                    purlxml_model.copyright         = parse_copyright
         
     | 
| 
      
 32 
     | 
    
         
            +
                    purlxml_model.use_and_reproduction = parse_use_and_reproduction
         
     | 
| 
      
 33 
     | 
    
         
            +
                    purlxml_model.source_id = parse_sourceid
         
     | 
| 
      
 34 
     | 
    
         
            +
                    purlxml_model
         
     | 
| 
      
 35 
     | 
    
         
            +
                  end
         
     | 
| 
      
 36 
     | 
    
         
            +
             
     | 
| 
       38 
37 
     | 
    
         
             
                  # extracts the identityMetadata for this fedora object, from the purl xml
         
     | 
| 
       39 
38 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the identityMetadata for the fedora object
         
     | 
| 
       40 
39 
     | 
    
         
             
                  # @raise [DiscoveryIndexer::Errors::MissingIdentityMetadata] if there is no identity_metadata
         
     | 
| 
       41 
40 
     | 
    
         
             
                  def parse_identity_metadata
         
     | 
| 
       42 
     | 
    
         
            -
                     
     | 
| 
       43 
     | 
    
         
            -
             
     | 
| 
       44 
     | 
    
         
            -
             
     | 
| 
       45 
     | 
    
         
            -
             
     | 
| 
       46 
     | 
    
         
            -
                     
     | 
| 
       47 
     | 
    
         
            -
                      raise DiscoveryIndexer::Errors::MissingIdentityMetadata.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       48 
     | 
    
         
            -
                    end
         
     | 
| 
      
 41 
     | 
    
         
            +
                    ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/identityMetadata').to_xml)
         
     | 
| 
      
 42 
     | 
    
         
            +
                    fail DiscoveryIndexer::Errors::MissingIdentityMetadata.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
         
     | 
| 
      
 43 
     | 
    
         
            +
                    ng_doc
         
     | 
| 
      
 44 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 45 
     | 
    
         
            +
                    raise DiscoveryIndexer::Errors::MissingIdentityMetadata.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       49 
46 
     | 
    
         
             
                  end
         
     | 
| 
       50 
     | 
    
         
            -
             
     | 
| 
       51 
     | 
    
         
            -
                  def parse_rights_metadata 
     | 
| 
       52 
     | 
    
         
            -
                     
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                     
     | 
| 
       57 
     | 
    
         
            -
                      raise DiscoveryIndexer::Errors::MissingRightsMetadata.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       58 
     | 
    
         
            -
                    end
         
     | 
| 
      
 47 
     | 
    
         
            +
             
     | 
| 
      
 48 
     | 
    
         
            +
                  def parse_rights_metadata
         
     | 
| 
      
 49 
     | 
    
         
            +
                    ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/rightsMetadata').to_xml)
         
     | 
| 
      
 50 
     | 
    
         
            +
                    fail DiscoveryIndexer::Errors::MissingRightsMetadata.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
         
     | 
| 
      
 51 
     | 
    
         
            +
                    ng_doc
         
     | 
| 
      
 52 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 53 
     | 
    
         
            +
                    raise DiscoveryIndexer::Errors::MissingRightsMetadata.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       59 
54 
     | 
    
         
             
                  end
         
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
      
 55 
     | 
    
         
            +
             
     | 
| 
       61 
56 
     | 
    
         
             
                  # extracts the dc field for this fedora object, from the purl xml
         
     | 
| 
       62 
57 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the dc for the fedora object
         
     | 
| 
       63 
58 
     | 
    
         
             
                  # @raise [DiscoveryIndexer::Errors::MissingDC] if there is no dc element
         
     | 
| 
       64 
59 
     | 
    
         
             
                  def parse_dc
         
     | 
| 
       65 
     | 
    
         
            -
             
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
             
     | 
| 
       70 
     | 
    
         
            -
                        raise DiscoveryIndexer::Errors::MissingDC.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       71 
     | 
    
         
            -
                      end
         
     | 
| 
      
 60 
     | 
    
         
            +
                    ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/dc:dc', 'dc' => OAI_DC_NAMESPACE).to_xml(encoding: 'utf-8'))
         
     | 
| 
      
 61 
     | 
    
         
            +
                    fail DiscoveryIndexer::Errors::MissingDC.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
         
     | 
| 
      
 62 
     | 
    
         
            +
                    ng_doc
         
     | 
| 
      
 63 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 64 
     | 
    
         
            +
                    raise DiscoveryIndexer::Errors::MissingDC.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       72 
65 
     | 
    
         
             
                  end
         
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
      
 66 
     | 
    
         
            +
             
     | 
| 
       74 
67 
     | 
    
         
             
                  # extracts the rdf field for this fedora object, from the purl xml
         
     | 
| 
       75 
68 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the rdf for the fedora object
         
     | 
| 
       76 
69 
     | 
    
         
             
                  # @raise [DiscoveryIndexer::Errors::MissingRDF] if there is no rdf element
         
     | 
| 
       77 
70 
     | 
    
         
             
                  def parse_rdf
         
     | 
| 
       78 
     | 
    
         
            -
                     
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
                     
     | 
| 
       83 
     | 
    
         
            -
                      raise DiscoveryIndexer::Errors::MissingRDF.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       84 
     | 
    
         
            -
                    end
         
     | 
| 
      
 71 
     | 
    
         
            +
                    ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/rdf:RDF', 'rdf' => RDF_NAMESPACE).to_xml)
         
     | 
| 
      
 72 
     | 
    
         
            +
                    fail DiscoveryIndexer::Errors::MissingRDF.new(@purlxml_ng_doc.inspect) if !ng_doc || ng_doc.children.empty?
         
     | 
| 
      
 73 
     | 
    
         
            +
                    ng_doc
         
     | 
| 
      
 74 
     | 
    
         
            +
                  rescue
         
     | 
| 
      
 75 
     | 
    
         
            +
                    raise DiscoveryIndexer::Errors::MissingRDF.new(@purlxml_ng_doc.inspect)
         
     | 
| 
       85 
76 
     | 
    
         
             
                  end
         
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
                  
         
     | 
| 
      
 77 
     | 
    
         
            +
             
     | 
| 
       88 
78 
     | 
    
         
             
                  # extracts the release tag element for this fedora object, from the the ReleaseData element in purl xml
         
     | 
| 
       89 
79 
     | 
    
         
             
                  # @return [Hash] the release tags for the fedora object
         
     | 
| 
       90 
80 
     | 
    
         
             
                  def parse_release_tags_hash
         
     | 
| 
       91 
     | 
    
         
            -
                    release_tags={}
         
     | 
| 
       92 
     | 
    
         
            -
                    unless 
     | 
| 
       93 
     | 
    
         
            -
                      release_elements = 
     | 
| 
       94 
     | 
    
         
            -
                      release_elements.each  
     | 
| 
       95 
     | 
    
         
            -
                        unless n.attr( 
     | 
| 
       96 
     | 
    
         
            -
                          release_target = n.attr( 
     | 
| 
      
 81 
     | 
    
         
            +
                    release_tags = {}
         
     | 
| 
      
 82 
     | 
    
         
            +
                    unless @purlxml_ng_doc.nil?
         
     | 
| 
      
 83 
     | 
    
         
            +
                      release_elements = @purlxml_ng_doc.xpath('//ReleaseData/release')
         
     | 
| 
      
 84 
     | 
    
         
            +
                      release_elements.each do |n|
         
     | 
| 
      
 85 
     | 
    
         
            +
                        unless n.attr('to').nil?
         
     | 
| 
      
 86 
     | 
    
         
            +
                          release_target = n.attr('to')
         
     | 
| 
       97 
87 
     | 
    
         
             
                          text = n.text
         
     | 
| 
       98 
     | 
    
         
            -
                          unless text.nil? 
     | 
| 
       99 
     | 
    
         
            -
                            release_tags[release_target]= to_boolean(text) 
         
     | 
| 
       100 
     | 
    
         
            -
                          end
         
     | 
| 
      
 88 
     | 
    
         
            +
                          release_tags[release_target] = to_boolean(text) unless text.nil?
         
     | 
| 
       101 
89 
     | 
    
         
             
                        end
         
     | 
| 
       102 
     | 
    
         
            -
                       
     | 
| 
      
 90 
     | 
    
         
            +
                      end
         
     | 
| 
       103 
91 
     | 
    
         
             
                    end
         
     | 
| 
       104 
     | 
    
         
            -
                     
     | 
| 
      
 92 
     | 
    
         
            +
                    release_tags
         
     | 
| 
       105 
93 
     | 
    
         
             
                  end
         
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
      
 94 
     | 
    
         
            +
             
     | 
| 
       107 
95 
     | 
    
         
             
                  # extracts the contentMetadata for this fedora object, from the purl xml
         
     | 
| 
       108 
96 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the contentMetadata for the fedora object
         
     | 
| 
       109 
97 
     | 
    
         
             
                  # @raise [DiscoveryIndexer::Errors::MissingContentMetadata] if there is no contentMetadata
         
     | 
| 
       110 
98 
     | 
    
         
             
                  def parse_content_metadata
         
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
             
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
      
 99 
     | 
    
         
            +
                    ng_doc = Nokogiri::XML(@purlxml_ng_doc.root.xpath('/publicObject/contentMetadata').to_xml)
         
     | 
| 
      
 100 
     | 
    
         
            +
                    ng_doc = nil if !ng_doc || ng_doc.children.empty?
         
     | 
| 
      
 101 
     | 
    
         
            +
                    ng_doc
         
     | 
| 
       114 
102 
     | 
    
         
             
                  end
         
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
      
 103 
     | 
    
         
            +
             
     | 
| 
       116 
104 
     | 
    
         
             
                  # @return true if the identityMetadata has <objectType>collection</objectType>, false otherwise
         
     | 
| 
       117 
105 
     | 
    
         
             
                  def parse_is_collection
         
     | 
| 
       118 
     | 
    
         
            -
                    identity_metadata 
     | 
| 
      
 106 
     | 
    
         
            +
                    identity_metadata = parse_identity_metadata
         
     | 
| 
       119 
107 
     | 
    
         
             
                    unless identity_metadata.nil?
         
     | 
| 
       120 
108 
     | 
    
         
             
                      object_type_nodes = identity_metadata.xpath('//objectType')
         
     | 
| 
       121 
     | 
    
         
            -
                      return true if object_type_nodes.find_index { |n|  
     | 
| 
      
 109 
     | 
    
         
            +
                      return true if object_type_nodes.find_index { |n| %w(collection set).include? n.text.downcase }
         
     | 
| 
       122 
110 
     | 
    
         
             
                    end
         
     | 
| 
       123 
111 
     | 
    
         
             
                    false
         
     | 
| 
       124 
112 
     | 
    
         
             
                  end
         
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
      
 113 
     | 
    
         
            +
             
     | 
| 
       126 
114 
     | 
    
         
             
                  # get the druids from isMemberOfCollection relationships in rels-ext from public_xml
         
     | 
| 
       127 
115 
     | 
    
         
             
                  # @return [Array<String>] the druids (e.g. ww123yy1234) this object has isMemberOfColletion relationship with, or nil if none
         
     | 
| 
       128 
116 
     | 
    
         
             
                  def parse_collection_druids
         
     | 
| 
       129 
     | 
    
         
            -
                    ns_hash = {'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' =>  
     | 
| 
      
 117 
     | 
    
         
            +
                    ns_hash = { 'rdf' => 'http://www.w3.org/1999/02/22-rdf-syntax-ns#', 'fedora' => 'info:fedora/fedora-system:def/relations-external#', '' => '' }
         
     | 
| 
       130 
118 
     | 
    
         
             
                    is_member_of_nodes ||= @purlxml_ng_doc.xpath('/publicObject/rdf:RDF/rdf:Description/fedora:isMemberOfCollection/@rdf:resource', ns_hash)
         
     | 
| 
       131 
119 
     | 
    
         
             
                    # from public_xml rels-ext
         
     | 
| 
       132 
120 
     | 
    
         
             
                    druids = []
         
     | 
| 
       133 
     | 
    
         
            -
                    is_member_of_nodes.each  
     | 
| 
      
 121 
     | 
    
         
            +
                    is_member_of_nodes.each do |n|
         
     | 
| 
       134 
122 
     | 
    
         
             
                      druids << n.value.split('druid:').last unless n.value.empty?
         
     | 
| 
       135 
     | 
    
         
            -
                     
     | 
| 
      
 123 
     | 
    
         
            +
                    end
         
     | 
| 
       136 
124 
     | 
    
         
             
                    return nil if druids.empty?
         
     | 
| 
       137 
125 
     | 
    
         
             
                    druids
         
     | 
| 
       138 
126 
     | 
    
         
             
                  end
         
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
      
 127 
     | 
    
         
            +
             
     | 
| 
       140 
128 
     | 
    
         
             
                  # the value of the type attribute for a DOR object's contentMetadata
         
     | 
| 
       141 
129 
     | 
    
         
             
                  #  more info about these values is here:
         
     | 
| 
       142 
130 
     | 
    
         
             
                  #    https://consul.stanford.edu/display/chimera/DOR+content+types%2C+resource+types+and+interpretive+metadata
         
     | 
| 
       143 
131 
     | 
    
         
             
                  #    https://consul.stanford.edu/display/chimera/Summary+of+Content+Types%2C+Resource+Types+and+their+behaviors
         
     | 
| 
       144 
     | 
    
         
            -
                  # @return [String] 
     | 
| 
      
 132 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
       145 
133 
     | 
    
         
             
                  def parse_dor_content_type
         
     | 
| 
       146 
134 
     | 
    
         
             
                    content_md = parse_content_metadata
         
     | 
| 
       147 
135 
     | 
    
         
             
                    dct = content_md ? content_md.xpath('contentMetadata/@type').text : nil
         
     | 
| 
       148 
136 
     | 
    
         
             
                    DiscoveryIndexer::Logging.logger.debug "#{@druid} has no DOR content type (<contentMetadata> element may be missing type attribute)" if !dct || dct.empty?
         
     | 
| 
       149 
137 
     | 
    
         
             
                    dct
         
     | 
| 
       150 
138 
     | 
    
         
             
                  end
         
     | 
| 
       151 
     | 
    
         
            -
             
     | 
| 
      
 139 
     | 
    
         
            +
             
     | 
| 
       152 
140 
     | 
    
         
             
                  # the value of the displyType tag from a DOR collection's identityMetadata
         
     | 
| 
       153 
     | 
    
         
            -
                  # @return [String] 
     | 
| 
      
 141 
     | 
    
         
            +
                  # @return [String]
         
     | 
| 
       154 
142 
     | 
    
         
             
                  def parse_dor_display_type
         
     | 
| 
       155 
143 
     | 
    
         
             
                    identity_md = parse_identity_metadata
         
     | 
| 
       156 
144 
     | 
    
         
             
                    ddt = identity_md ? identity_md.xpath('//displayType').text : nil
         
     | 
| 
         @@ -161,43 +149,44 @@ module DiscoveryIndexer 
     | 
|
| 
       161 
149 
     | 
    
         
             
                  # the @id attribute of resource/file elements that match the image type, including extension
         
     | 
| 
       162 
150 
     | 
    
         
             
                  # @return [Array<String>] filenames
         
     | 
| 
       163 
151 
     | 
    
         
             
                  def parse_image_ids
         
     | 
| 
       164 
     | 
    
         
            -
             
     | 
| 
       165 
     | 
    
         
            -
             
     | 
| 
       166 
     | 
    
         
            -
             
     | 
| 
       167 
     | 
    
         
            -
             
     | 
| 
       168 
     | 
    
         
            -
             
     | 
| 
       169 
     | 
    
         
            -
                        }
         
     | 
| 
       170 
     | 
    
         
            -
                      return nil if ids.empty?
         
     | 
| 
       171 
     | 
    
         
            -
                      ids
         
     | 
| 
      
 152 
     | 
    
         
            +
                    ids = []
         
     | 
| 
      
 153 
     | 
    
         
            +
                    content_md = parse_content_metadata
         
     | 
| 
      
 154 
     | 
    
         
            +
                    return nil if content_md.nil?
         
     | 
| 
      
 155 
     | 
    
         
            +
                    content_md.xpath('//resource[@type="image"]/file/@id').each do |node|
         
     | 
| 
      
 156 
     | 
    
         
            +
                      ids << node.text unless node.text.empty?
         
     | 
| 
       172 
157 
     | 
    
         
             
                    end
         
     | 
| 
      
 158 
     | 
    
         
            +
                    content_md.xpath('//resource[@type="page"]/file/@id').each do |node|
         
     | 
| 
      
 159 
     | 
    
         
            +
                      ids << node.text unless node.text.empty?
         
     | 
| 
      
 160 
     | 
    
         
            +
                    end
         
     | 
| 
      
 161 
     | 
    
         
            +
                    return nil if ids.empty?
         
     | 
| 
      
 162 
     | 
    
         
            +
                    ids
         
     | 
| 
       173 
163 
     | 
    
         
             
                  end
         
     | 
| 
       174 
164 
     | 
    
         | 
| 
       175 
165 
     | 
    
         
             
                  def parse_sourceid
         
     | 
| 
       176 
166 
     | 
    
         
             
                    get_value(@purlxml_ng_doc.css('//identityMetadata/sourceId'))
         
     | 
| 
       177 
167 
     | 
    
         
             
                  end
         
     | 
| 
       178 
     | 
    
         
            -
             
     | 
| 
      
 168 
     | 
    
         
            +
             
     | 
| 
       179 
169 
     | 
    
         
             
                  def parse_copyright
         
     | 
| 
       180 
170 
     | 
    
         
             
                    get_value(@purlxml_ng_doc.css('//rightsMetadata/copyright/human[type="copyright"]'))
         
     | 
| 
       181 
171 
     | 
    
         
             
                  end
         
     | 
| 
       182 
     | 
    
         
            -
             
     | 
| 
      
 172 
     | 
    
         
            +
             
     | 
| 
       183 
173 
     | 
    
         
             
                  def parse_use_and_reproduction
         
     | 
| 
       184 
174 
     | 
    
         
             
                    get_value(@purlxml_ng_doc.css('//rightsMetadata/use/human[type="useAndReproduction"]'))
         
     | 
| 
       185 
175 
     | 
    
         
             
                  end
         
     | 
| 
       186 
     | 
    
         
            -
             
     | 
| 
      
 176 
     | 
    
         
            +
             
     | 
| 
       187 
177 
     | 
    
         
             
                  # the @id attribute of resource/file elements, including extension
         
     | 
| 
       188 
178 
     | 
    
         
             
                  # @return [Array<String>] filenames
         
     | 
| 
       189 
179 
     | 
    
         
             
                  def parse_file_ids
         
     | 
| 
       190 
180 
     | 
    
         
             
                    ids = []
         
     | 
| 
       191 
181 
     | 
    
         
             
                    content_md = parse_content_metadata
         
     | 
| 
       192 
     | 
    
         
            -
                    unless content_md.nil?
         
     | 
| 
       193 
     | 
    
         
            -
             
     | 
| 
       194 
     | 
    
         
            -
             
     | 
| 
       195 
     | 
    
         
            -
             
     | 
| 
       196 
     | 
    
         
            -
             
     | 
| 
       197 
     | 
    
         
            -
             
     | 
| 
       198 
     | 
    
         
            -
             
     | 
| 
       199 
     | 
    
         
            -
             
     | 
| 
       200 
     | 
    
         
            -
                  
         
     | 
| 
      
 182 
     | 
    
         
            +
                    return unless content_md.nil?
         
     | 
| 
      
 183 
     | 
    
         
            +
                    content_md.xpath('//resource/file/@id').each do |node|
         
     | 
| 
      
 184 
     | 
    
         
            +
                      ids << node.text unless node.text.empty?
         
     | 
| 
      
 185 
     | 
    
         
            +
                    end
         
     | 
| 
      
 186 
     | 
    
         
            +
                    return nil if ids.empty?
         
     | 
| 
      
 187 
     | 
    
         
            +
                    ids
         
     | 
| 
      
 188 
     | 
    
         
            +
                  end
         
     | 
| 
      
 189 
     | 
    
         
            +
             
     | 
| 
       201 
190 
     | 
    
         
             
                  # @return catkey value from the DOR identity_metadata, or nil if there is no catkey
         
     | 
| 
       202 
191 
     | 
    
         
             
                  def parse_catkey
         
     | 
| 
       203 
192 
     | 
    
         
             
                    get_value(@purlxml_ng_doc.xpath("/publicObject/identityMetadata/otherId[@name='catkey']"))
         
     | 
| 
         @@ -210,23 +199,22 @@ module DiscoveryIndexer 
     | 
|
| 
       210 
199 
     | 
    
         | 
| 
       211 
200 
     | 
    
         
             
                  # @return objectLabel value from the DOR identity_metadata, or nil if there is no barcode
         
     | 
| 
       212 
201 
     | 
    
         
             
                  def parse_label
         
     | 
| 
       213 
     | 
    
         
            -
                    get_value(@purlxml_ng_doc.xpath( 
     | 
| 
      
 202 
     | 
    
         
            +
                    get_value(@purlxml_ng_doc.xpath('/publicObject/identityMetadata/objectLabel'))
         
     | 
| 
       214 
203 
     | 
    
         
             
                  end
         
     | 
| 
       215 
     | 
    
         
            -
             
     | 
| 
      
 204 
     | 
    
         
            +
             
     | 
| 
       216 
205 
     | 
    
         
             
                  def get_value(node)
         
     | 
| 
       217 
     | 
    
         
            -
                    (node && node.first) ? node.first.content : nil 
     | 
| 
      
 206 
     | 
    
         
            +
                    (node && node.first) ? node.first.content : nil
         
     | 
| 
       218 
207 
     | 
    
         
             
                  end
         
     | 
| 
       219 
     | 
    
         
            -
             
     | 
| 
      
 208 
     | 
    
         
            +
             
     | 
| 
       220 
209 
     | 
    
         
             
                  def to_boolean(text)
         
     | 
| 
       221 
     | 
    
         
            -
                    if text.nil? || text.empty? 
     | 
| 
      
 210 
     | 
    
         
            +
                    if text.nil? || text.empty?
         
     | 
| 
       222 
211 
     | 
    
         
             
                      return false
         
     | 
| 
       223 
     | 
    
         
            -
                    elsif text.downcase.eql?( 
     | 
| 
      
 212 
     | 
    
         
            +
                    elsif text.downcase.eql?('true') || text.downcase == 't'
         
     | 
| 
       224 
213 
     | 
    
         
             
                      return true
         
     | 
| 
       225 
214 
     | 
    
         
             
                    else
         
     | 
| 
       226 
215 
     | 
    
         
             
                      return false
         
     | 
| 
       227 
216 
     | 
    
         
             
                    end
         
     | 
| 
       228 
     | 
    
         
            -
                  end 
     | 
| 
      
 217 
     | 
    
         
            +
                  end
         
     | 
| 
       229 
218 
     | 
    
         
             
                end
         
     | 
| 
       230 
219 
     | 
    
         
             
              end
         
     | 
| 
       231 
220 
     | 
    
         
             
            end
         
     | 
| 
       232 
     | 
    
         
            -
              
         
     | 
| 
         @@ -3,14 +3,13 @@ require 'open-uri' 
     | 
|
| 
       3 
3 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       4 
4 
     | 
    
         
             
              module InputXml
         
     | 
| 
       5 
5 
     | 
    
         
             
                class PurlxmlReader
         
     | 
| 
       6 
     | 
    
         
            -
             
     | 
| 
       7 
6 
     | 
    
         
             
                  # reads the public xml for the fedora object that is defined , from the purl server
         
     | 
| 
       8 
7 
     | 
    
         
             
                  # @param [String] druid e.g. ab123cd4567
         
     | 
| 
       9 
8 
     | 
    
         
             
                  # @return [Nokogiri::XML::Document] the public xml for the fedora object
         
     | 
| 
       10 
9 
     | 
    
         
             
                  # @raise [MissingPublicXml] if there's no purl xml available for this druid
         
     | 
| 
       11 
10 
     | 
    
         
             
                  def self.read(druid)
         
     | 
| 
       12 
11 
     | 
    
         
             
                    purlxml_uri = "#{DiscoveryIndexer::PURL_DEFAULT}/#{druid}.xml"
         
     | 
| 
       13 
     | 
    
         
            -
             
     | 
| 
      
 12 
     | 
    
         
            +
             
     | 
| 
       14 
13 
     | 
    
         
             
                    begin
         
     | 
| 
       15 
14 
     | 
    
         
             
                      purlxml_object = Nokogiri::XML(open(purlxml_uri))
         
     | 
| 
       16 
15 
     | 
    
         
             
                      return purlxml_object
         
     | 
| 
         @@ -20,4 +19,4 @@ module DiscoveryIndexer 
     | 
|
| 
       20 
19 
     | 
    
         
             
                  end
         
     | 
| 
       21 
20 
     | 
    
         
             
                end
         
     | 
| 
       22 
21 
     | 
    
         
             
              end
         
     | 
| 
       23 
     | 
    
         
            -
            end
         
     | 
| 
      
 22 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/version.rb
    CHANGED
    
    
    
        data/lib/writer/solr_client.rb
    CHANGED
    
    | 
         @@ -3,6 +3,7 @@ require 'rsolr' 
     | 
|
| 
       3 
3 
     | 
    
         
             
            require 'rest-client'
         
     | 
| 
       4 
4 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       5 
5 
     | 
    
         
             
              module Writer
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Processes adds and deletes to the solr core
         
     | 
| 
       6 
7 
     | 
    
         
             
                class SolrClient
         
     | 
| 
       7 
8 
     | 
    
         
             
                  include DiscoveryIndexer::Logging
         
     | 
| 
       8 
9 
     | 
    
         | 
| 
         @@ -13,7 +14,7 @@ module DiscoveryIndexer 
     | 
|
| 
       13 
14 
     | 
    
         
             
                  # @param solr_connector [RSolr::Client]  is an open connection with the solr core
         
     | 
| 
       14 
15 
     | 
    
         
             
                  # @param max_retries [Integer] the maximum number of tries before fail
         
     | 
| 
       15 
16 
     | 
    
         
             
                  def self.add(id, solr_doc, solr_connector, max_retries = 10)
         
     | 
| 
       16 
     | 
    
         
            -
                    process(id, solr_doc, solr_connector, max_retries,  
     | 
| 
      
 17 
     | 
    
         
            +
                    process(id, solr_doc, solr_connector, max_retries, false)
         
     | 
| 
       17 
18 
     | 
    
         
             
                  end
         
     | 
| 
       18 
19 
     | 
    
         | 
| 
       19 
20 
     | 
    
         
             
                  # Add the document to solr, retry if an error occurs.
         
     | 
| 
         @@ -22,79 +23,76 @@ module DiscoveryIndexer 
     | 
|
| 
       22 
23 
     | 
    
         
             
                  # @param solr_connector[RSolr::Client]  is an open connection with the solr core
         
     | 
| 
       23 
24 
     | 
    
         
             
                  # @param max_retries [Integer] the maximum number of tries before fail
         
     | 
| 
       24 
25 
     | 
    
         
             
                  def self.delete(id, solr_connector, max_retries = 10)
         
     | 
| 
       25 
     | 
    
         
            -
                    process(id, {}, solr_connector, max_retries,  
     | 
| 
      
 26 
     | 
    
         
            +
                    process(id, {}, solr_connector, max_retries, true)
         
     | 
| 
       26 
27 
     | 
    
         
             
                  end
         
     | 
| 
       27 
28 
     | 
    
         | 
| 
       28 
29 
     | 
    
         
             
                  # It's an internal method that receives all the requests and deal with
         
     | 
| 
       29 
30 
     | 
    
         
             
                  # SOLR core. This method can call add, delete, or update
         
     | 
| 
       30 
31 
     | 
    
         
             
                  #
         
     | 
| 
       31 
32 
     | 
    
         
             
                  # @param id [String] the document id, usually it will be druid.
         
     | 
| 
       32 
     | 
    
         
            -
                  # @param solr_doc [Hash] is the solr doc in hash format 
     | 
| 
      
 33 
     | 
    
         
            +
                  # @param solr_doc [Hash] is the solr doc in hash format
         
     | 
| 
       33 
34 
     | 
    
         
             
                  # @param solr_connector [RSolr::Client]  is an open connection with the solr core
         
     | 
| 
       34 
35 
     | 
    
         
             
                  # @param max_retries [Integer] the maximum number of tries before fail
         
     | 
| 
       35 
     | 
    
         
            -
                  def self.process(id, solr_doc, solr_connector, max_retries, is_delete=false)
         
     | 
| 
       36 
     | 
    
         
            -
                    handler =  
     | 
| 
      
 36 
     | 
    
         
            +
                  def self.process(id, solr_doc, solr_connector, max_retries, is_delete = false)
         
     | 
| 
      
 37 
     | 
    
         
            +
                    handler = proc do |exception, attempt_number, _total_delay|
         
     | 
| 
       37 
38 
     | 
    
         
             
                      DiscoveryIndexer::Logging.logger.debug "#{exception.class} on attempt #{attempt_number} for #{id}"
         
     | 
| 
       38 
39 
     | 
    
         
             
                    end
         
     | 
| 
       39 
     | 
    
         
            -
             
     | 
| 
       40 
     | 
    
         
            -
                    with_retries(: 
     | 
| 
      
 40 
     | 
    
         
            +
             
     | 
| 
      
 41 
     | 
    
         
            +
                    with_retries(max_tries: max_retries, handler: handler, base_sleep_seconds: 1, max_sleep_seconds: 5) do |attempt|
         
     | 
| 
       41 
42 
     | 
    
         
             
                      DiscoveryIndexer::Logging.logger.debug "Attempt #{attempt} for #{id}"
         
     | 
| 
       42 
     | 
    
         
            -
             
     | 
| 
      
 43 
     | 
    
         
            +
             
     | 
| 
       43 
44 
     | 
    
         
             
                      if is_delete
         
     | 
| 
       44 
45 
     | 
    
         
             
                        DiscoveryIndexer::Logging.logger.info "Deleting #{id} on attempt #{attempt}"
         
     | 
| 
       45 
46 
     | 
    
         
             
                        solr_connector.delete_by_id(id)
         
     | 
| 
       46 
     | 
    
         
            -
                      elsif allow_update?(solr_connector) && doc_exists?(id,solr_connector)
         
     | 
| 
      
 47 
     | 
    
         
            +
                      elsif allow_update?(solr_connector) && doc_exists?(id, solr_connector)
         
     | 
| 
       47 
48 
     | 
    
         
             
                        DiscoveryIndexer::Logging.logger.info "Updating #{id} on attempt #{attempt}"
         
     | 
| 
       48 
     | 
    
         
            -
                        update_solr_doc(id,solr_doc,solr_connector)
         
     | 
| 
      
 49 
     | 
    
         
            +
                        update_solr_doc(id, solr_doc, solr_connector)
         
     | 
| 
       49 
50 
     | 
    
         
             
                      else
         
     | 
| 
       50 
51 
     | 
    
         
             
                        DiscoveryIndexer::Logging.logger.info "Indexing #{id} on attempt #{attempt}"
         
     | 
| 
       51 
52 
     | 
    
         
             
                        solr_connector.add(solr_doc)
         
     | 
| 
       52 
53 
     | 
    
         
             
                      end
         
     | 
| 
       53 
54 
     | 
    
         
             
                      solr_connector.commit
         
     | 
| 
       54 
55 
     | 
    
         
             
                      DiscoveryIndexer::Logging.logger.info "Completing #{id} successfully on attempt #{attempt}"
         
     | 
| 
       55 
     | 
    
         
            -
                      
         
     | 
| 
       56 
56 
     | 
    
         
             
                    end
         
     | 
| 
       57 
57 
     | 
    
         
             
                  end
         
     | 
| 
       58 
58 
     | 
    
         | 
| 
       59 
59 
     | 
    
         
             
                  # @param solr_connector [RSolr::Client]  is an open connection with the solr core
         
     | 
| 
       60 
60 
     | 
    
         
             
                  # @return [Boolean] true if the solr core allowing update feature
         
     | 
| 
       61 
61 
     | 
    
         
             
                  def self.allow_update?(solr_connector)
         
     | 
| 
       62 
     | 
    
         
            -
                     
     | 
| 
      
 62 
     | 
    
         
            +
                    solr_connector.options.include?(:allow_update) ? solr_connector.options[:allow_update] : false
         
     | 
| 
       63 
63 
     | 
    
         
             
                  end
         
     | 
| 
       64 
64 
     | 
    
         | 
| 
       65 
65 
     | 
    
         
             
                  # @param id [String] the document id, usually it will be druid.
         
     | 
| 
       66 
66 
     | 
    
         
             
                  # @param solr_connector [RSolr::Client]  is an open connection with the solr core
         
     | 
| 
       67 
67 
     | 
    
         
             
                  # @return [Boolean] true if the solr doc defined by this id exists
         
     | 
| 
       68 
     | 
    
         
            -
                  def self.doc_exists?(id,solr_connector)
         
     | 
| 
       69 
     | 
    
         
            -
                    response=solr_connector.get 'select', : 
     | 
| 
      
 68 
     | 
    
         
            +
                  def self.doc_exists?(id, solr_connector)
         
     | 
| 
      
 69 
     | 
    
         
            +
                    response = solr_connector.get 'select', params: { q: 'id:"' + id + '"' }
         
     | 
| 
       70 
70 
     | 
    
         
             
                    response['response']['numFound'] == 1
         
     | 
| 
       71 
71 
     | 
    
         
             
                  end
         
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
      
 72 
     | 
    
         
            +
             
     | 
| 
       73 
73 
     | 
    
         
             
                  # It is an internal method that updates the solr doc instead of adding a new one.
         
     | 
| 
       74 
     | 
    
         
            -
                  def self.update_solr_doc(id,solr_doc,solr_connector)
         
     | 
| 
      
 74 
     | 
    
         
            +
                  def self.update_solr_doc(id, solr_doc, solr_connector)
         
     | 
| 
       75 
75 
     | 
    
         
             
                    # update_solr_doc can't used RSolr because updating hash doc is not supported
         
     | 
| 
       76 
76 
     | 
    
         
             
                    #  so we need to build the json input manually
         
     | 
| 
       77 
77 
     | 
    
         
             
                    solr_url = solr_connector.options[:url]
         
     | 
| 
       78 
     | 
    
         
            -
                    if solr_url.end_with?( 
     | 
| 
       79 
     | 
    
         
            -
                      url="#{solr_connector.options[:url]}update?commit=true"
         
     | 
| 
      
 78 
     | 
    
         
            +
                    if solr_url.end_with?('/')
         
     | 
| 
      
 79 
     | 
    
         
            +
                      url = "#{solr_connector.options[:url]}update?commit=true"
         
     | 
| 
       80 
80 
     | 
    
         
             
                    else
         
     | 
| 
       81 
     | 
    
         
            -
                      url="#{solr_connector.options[:url]}/update?commit=true"
         
     | 
| 
      
 81 
     | 
    
         
            +
                      url = "#{solr_connector.options[:url]}/update?commit=true"
         
     | 
| 
       82 
82 
     | 
    
         
             
                    end
         
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
                    params="[{\"id\":\"#{id}\","
         
     | 
| 
       85 
     | 
    
         
            -
                    solr_doc.each do |field_name,new_values|
         
     | 
| 
       86 
     | 
    
         
            -
                      unless field_name == :id
         
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
                      end
         
     | 
| 
      
 83 
     | 
    
         
            +
             
     | 
| 
      
 84 
     | 
    
         
            +
                    params = "[{\"id\":\"#{id}\","
         
     | 
| 
      
 85 
     | 
    
         
            +
                    solr_doc.each do |field_name, new_values|
         
     | 
| 
      
 86 
     | 
    
         
            +
                      next unless field_name == :id
         
     | 
| 
      
 87 
     | 
    
         
            +
                      params += "\"#{field_name}\":"
         
     | 
| 
      
 88 
     | 
    
         
            +
                      new_values = [new_values] unless new_values.class == Array
         
     | 
| 
      
 89 
     | 
    
         
            +
                      new_values = new_values.map { |s| s.to_s.gsub('\\', '\\\\\\').gsub('"', '\"').strip } # strip leading/trailing spaces and escape quotes for each value
         
     | 
| 
      
 90 
     | 
    
         
            +
                      params += "{\"set\":[\"#{new_values.join('","')}\"]},"
         
     | 
| 
       92 
91 
     | 
    
         
             
                    end
         
     | 
| 
       93 
92 
     | 
    
         
             
                    params.chomp!(',')
         
     | 
| 
       94 
     | 
    
         
            -
                    params+= 
     | 
| 
       95 
     | 
    
         
            -
                    RestClient.post url, params 
     | 
| 
      
 93 
     | 
    
         
            +
                    params += '}]'
         
     | 
| 
      
 94 
     | 
    
         
            +
                    RestClient.post url, params, content_type: :json, accept: :json
         
     | 
| 
       96 
95 
     | 
    
         
             
                  end
         
     | 
| 
       97 
     | 
    
         
            -
                  
         
     | 
| 
       98 
96 
     | 
    
         
             
                end
         
     | 
| 
       99 
97 
     | 
    
         
             
              end
         
     | 
| 
       100 
     | 
    
         
            -
            end
         
     | 
| 
      
 98 
     | 
    
         
            +
            end
         
     | 
    
        data/lib/writer/solr_writer.rb
    CHANGED
    
    | 
         @@ -3,57 +3,57 @@ require 'rsolr' 
     | 
|
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            module DiscoveryIndexer
         
     | 
| 
       5 
5 
     | 
    
         
             
              module Writer
         
     | 
| 
      
 6 
     | 
    
         
            +
                # Performs writes to solr client based upon true and false release flags
         
     | 
| 
       6 
7 
     | 
    
         
             
                class SolrWriter
         
     | 
| 
       7 
8 
     | 
    
         
             
                  include DiscoveryIndexer::Logging
         
     | 
| 
       8 
     | 
    
         
            -
             
     | 
| 
      
 9 
     | 
    
         
            +
             
     | 
| 
       9 
10 
     | 
    
         
             
                  def process(id, index_doc, targets, solr_targets_configs)
         
     | 
| 
       10 
11 
     | 
    
         
             
                    @solr_targets_configs = solr_targets_configs
         
     | 
| 
       11 
12 
     | 
    
         
             
                    index_targets = []
         
     | 
| 
       12 
13 
     | 
    
         
             
                    delete_targets = []
         
     | 
| 
       13 
     | 
    
         
            -
                    targets.keys.each do |target| 
     | 
| 
       14 
     | 
    
         
            -
                      if targets[target] 
     | 
| 
      
 14 
     | 
    
         
            +
                    targets.keys.each do |target|
         
     | 
| 
      
 15 
     | 
    
         
            +
                      if targets[target]
         
     | 
| 
       15 
16 
     | 
    
         
             
                        index_targets.append(target)
         
     | 
| 
       16 
17 
     | 
    
         
             
                      else
         
     | 
| 
       17 
18 
     | 
    
         
             
                        delete_targets.append(target)
         
     | 
| 
       18 
19 
     | 
    
         
             
                      end
         
     | 
| 
       19 
20 
     | 
    
         
             
                    end
         
     | 
| 
       20 
     | 
    
         
            -
             
     | 
| 
      
 21 
     | 
    
         
            +
             
     | 
| 
       21 
22 
     | 
    
         
             
                    # get targets with true
         
     | 
| 
       22 
23 
     | 
    
         
             
                    solr_index_client(id, index_doc, index_targets)
         
     | 
| 
       23 
24 
     | 
    
         
             
                    # get targets with false
         
     | 
| 
       24 
25 
     | 
    
         
             
                    solr_delete_client(id, delete_targets)
         
     | 
| 
       25 
26 
     | 
    
         
             
                  end
         
     | 
| 
       26 
     | 
    
         
            -
             
     | 
| 
      
 27 
     | 
    
         
            +
             
     | 
| 
       27 
28 
     | 
    
         
             
                  def solr_delete_from_all(id, solr_targets_configs)
         
     | 
| 
       28 
29 
     | 
    
         
             
                    # Get a list of all registered targets
         
     | 
| 
       29 
     | 
    
         
            -
                    @solr_targets_configs=solr_targets_configs
         
     | 
| 
       30 
     | 
    
         
            -
                    targets = @solr_targets_configs.keys 
     | 
| 
      
 30 
     | 
    
         
            +
                    @solr_targets_configs = solr_targets_configs
         
     | 
| 
      
 31 
     | 
    
         
            +
                    targets = @solr_targets_configs.keys
         
     | 
| 
       31 
32 
     | 
    
         
             
                    solr_delete_client(id, targets)
         
     | 
| 
       32 
33 
     | 
    
         
             
                  end
         
     | 
| 
       33 
     | 
    
         
            -
             
     | 
| 
      
 34 
     | 
    
         
            +
             
     | 
| 
       34 
35 
     | 
    
         
             
                  def solr_index_client(id, index_doc, targets)
         
     | 
| 
       35 
36 
     | 
    
         
             
                    targets.each do |solr_target|
         
     | 
| 
       36 
     | 
    
         
            -
                      solr_connector = get_connector_for_target(solr_target) 
     | 
| 
      
 37 
     | 
    
         
            +
                      solr_connector = get_connector_for_target(solr_target)
         
     | 
| 
       37 
38 
     | 
    
         
             
                      SolrClient.add(id, index_doc, solr_connector) unless solr_connector.nil?
         
     | 
| 
       38 
     | 
    
         
            -
                    end 
     | 
| 
      
 39 
     | 
    
         
            +
                    end
         
     | 
| 
       39 
40 
     | 
    
         
             
                  end
         
     | 
| 
       40 
     | 
    
         
            -
             
     | 
| 
      
 41 
     | 
    
         
            +
             
     | 
| 
       41 
42 
     | 
    
         
             
                  def solr_delete_client(id, targets)
         
     | 
| 
       42 
43 
     | 
    
         
             
                    targets.each do |solr_target|
         
     | 
| 
       43 
44 
     | 
    
         
             
                      solr_connector = get_connector_for_target(solr_target)
         
     | 
| 
       44 
45 
     | 
    
         
             
                      SolrClient.delete(id, solr_connector) unless solr_connector.nil?
         
     | 
| 
       45 
     | 
    
         
            -
                    end 
     | 
| 
      
 46 
     | 
    
         
            +
                    end
         
     | 
| 
       46 
47 
     | 
    
         
             
                  end
         
     | 
| 
       47 
48 
     | 
    
         | 
| 
       48 
49 
     | 
    
         
             
                  def get_connector_for_target(solr_target)
         
     | 
| 
       49 
50 
     | 
    
         
             
                    solr_connector = nil
         
     | 
| 
       50 
     | 
    
         
            -
                    if @solr_targets_configs.keys.include?(solr_target) 
     | 
| 
      
 51 
     | 
    
         
            +
                    if @solr_targets_configs.keys.include?(solr_target)
         
     | 
| 
       51 
52 
     | 
    
         
             
                      config = @solr_targets_configs[solr_target]
         
     | 
| 
       52 
53 
     | 
    
         
             
                      solr_connector = RSolr.connect(config.deep_symbolize_keys)
         
     | 
| 
       53 
54 
     | 
    
         
             
                    end
         
     | 
| 
       54 
     | 
    
         
            -
                     
     | 
| 
      
 55 
     | 
    
         
            +
                    solr_connector
         
     | 
| 
       55 
56 
     | 
    
         
             
                  end
         
     | 
| 
       56 
     | 
    
         
            -
                  
         
     | 
| 
       57 
57 
     | 
    
         
             
                end
         
     | 
| 
       58 
58 
     | 
    
         
             
              end
         
     | 
| 
       59 
59 
     | 
    
         
             
            end
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,14 +1,15 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: discovery-indexer
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 0.9. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 0.9.7
         
     | 
| 
       5 
5 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       6 
6 
     | 
    
         
             
            authors:
         
     | 
| 
       7 
7 
     | 
    
         
             
            - Ahmed AlSum
         
     | 
| 
      
 8 
     | 
    
         
            +
            - Laney McGlohon
         
     | 
| 
       8 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       9 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       10 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       11 
     | 
    
         
            -
            date: 2015- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2015-09-23 00:00:00.000000000 Z
         
     | 
| 
       12 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       13 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       14 
15 
     | 
    
         
             
              name: nokogiri
         
     | 
| 
         @@ -138,7 +139,7 @@ dependencies: 
     | 
|
| 
       138 
139 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       139 
140 
     | 
    
         
             
            description: This library manages the core operations for the discovery indexing such
         
     | 
| 
       140 
141 
     | 
    
         
             
              as reading PURL xml, mapping to the solr document, and writing to solr core.
         
     | 
| 
       141 
     | 
    
         
            -
            email:  
     | 
| 
      
 142 
     | 
    
         
            +
            email: laneymcg@stanford.edu
         
     | 
| 
       142 
143 
     | 
    
         
             
            executables: []
         
     | 
| 
       143 
144 
     | 
    
         
             
            extensions: []
         
     | 
| 
       144 
145 
     | 
    
         
             
            extra_rdoc_files: []
         
     |