cul_scv_hydra 0.18.5 → 0.19.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/concerns/cul/scv/hydra/models/common.rb +7 -1
- data/app/models/generic_aggregator.rb +20 -9
- data/config/fedora.yml +8 -8
- data/config/locales/nnc.ntriples +12 -0
- data/config/subs.yml +12 -0
- data/lib/cul_scv_hydra/indexer.rb +27 -31
- data/lib/cul_scv_hydra/risearch_members.rb +17 -13
- data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +0 -3
- data/lib/cul_scv_hydra/version.rb +1 -1
- metadata +4 -4
- data/config/solr.yml +0 -5
- data/lib/cul_scv_hydra/version.rb~ +0 -11
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 34451fcabc90e9ac89be6eed09a7220eb2083da4
         | 
| 4 | 
            +
              data.tar.gz: 67a8560c793304772b84e2d276c72ccfce0c9a42
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: 28db11a406d299d886630272013790937cd2319de0d7fb32a0677950428e2559ecd804ad207a8b09a32e022a05518fafdddd0693e164cda1f9e56e370780fab3
         | 
| 7 | 
            +
              data.tar.gz: 185b9756f75697271db9806c69c1fbc21f6b45c29a1bf692f2efb21aabecd5667b87e47284ec39850a65bba38184907f310d704df33d31ac24c8c6383c71963a
         | 
| @@ -73,6 +73,11 @@ module Cul::Scv::Hydra::Models::Common | |
| 73 73 | 
             
                has_desc
         | 
| 74 74 | 
             
              end
         | 
| 75 75 |  | 
| 76 | 
            +
              # set the index type label and any RI-based fields 
         | 
| 77 | 
            +
              def set_size_labels(solr_doc={})
         | 
| 78 | 
            +
                solr_doc["index_type_label_ssi"] = [self.index_type_label]
         | 
| 79 | 
            +
              end
         | 
| 80 | 
            +
             | 
| 76 81 | 
             
              def to_solr(solr_doc = Hash.new, opts={})
         | 
| 77 82 | 
             
                solr_doc = super(solr_doc, opts)
         | 
| 78 83 |  | 
| @@ -105,7 +110,8 @@ module Cul::Scv::Hydra::Models::Common | |
| 105 110 | 
             
                  solr_doc["title_display_ssm"].uniq!
         | 
| 106 111 | 
             
                end
         | 
| 107 112 | 
             
                solr_doc["format_ssi"] = [self.route_as]
         | 
| 108 | 
            -
             | 
| 113 | 
            +
             | 
| 114 | 
            +
                set_size_labels(solr_doc)
         | 
| 109 115 |  | 
| 110 116 | 
             
                solr_doc.each_pair {|key, value|
         | 
| 111 117 | 
             
                  if value.is_a? Array
         | 
| @@ -16,25 +16,36 @@ class GenericAggregator < ::ActiveFedora::Base | |
| 16 16 | 
             
              def index_type_label
         | 
| 17 17 | 
             
                riquery = Cul::Scv::Hydra::Models::MEMBER_ITQL.gsub(/%PID%/, self.pid)
         | 
| 18 18 | 
             
                begin
         | 
| 19 | 
            -
                  docs = Cul::Scv::Fedora.repository.find_by_itql riquery, limit: 2, format: 'json'
         | 
| 19 | 
            +
                  docs = Cul::Scv::Fedora.repository.find_by_itql riquery, limit: 2, format: 'count/json'
         | 
| 20 20 | 
             
                  docs = JSON.parse(docs)['results']
         | 
| 21 | 
            +
                  size = docs.first && docs.first['count'] && docs.first['count'].to_i
         | 
| 21 22 | 
             
                rescue Exception=>e
         | 
| 22 23 | 
             
                  Rails.logger.warn("#{self.class.name} failed to find children with TQL: #{e.message}")
         | 
| 23 | 
            -
                  docs = self.parts
         | 
| 24 24 | 
             
                end
         | 
| 25 | 
            -
                 | 
| 26 | 
            -
             | 
| 27 | 
            -
             | 
| 28 | 
            -
             | 
| 25 | 
            +
                size ||= self.parts(response_format: :solr, limit: 2).size
         | 
| 26 | 
            +
                type_label_for(size)
         | 
| 27 | 
            +
              end
         | 
| 28 | 
            +
             | 
| 29 | 
            +
              def type_label_for(size=nil)
         | 
| 30 | 
            +
                if size == 0
         | 
| 31 | 
            +
                  return "EMPTY"
         | 
| 32 | 
            +
                elsif size == 1
         | 
| 33 | 
            +
                  return "SINGLE PART"
         | 
| 29 34 | 
             
                else
         | 
| 30 | 
            -
                   | 
| 35 | 
            +
                  return "MULTIPART"
         | 
| 31 36 | 
             
                end
         | 
| 32 | 
            -
             | 
| 37 | 
            +
              end
         | 
| 38 | 
            +
             | 
| 39 | 
            +
              # set the index type label and any RI-based fields 
         | 
| 40 | 
            +
              # overridde
         | 
| 41 | 
            +
              def set_size_labels(solr_doc={})
         | 
| 42 | 
            +
                count = Cul::Scv::Hydra::RisearchMembers.get_direct_member_count(pid)
         | 
| 43 | 
            +
                solr_doc["index_type_label_ssi"] = [type_label_for(count)]
         | 
| 44 | 
            +
                solr_doc["cul_number_of_members_isi"] = count
         | 
| 33 45 | 
             
              end
         | 
| 34 46 |  | 
| 35 47 | 
             
              def to_solr(solr_doc = Hash.new, opts={})
         | 
| 36 48 | 
             
                solr_doc = super(solr_doc, opts)
         | 
| 37 | 
            -
                solr_doc["cul_number_of_members_isi"] = Cul::Scv::Hydra::RisearchMembers.get_direct_member_pids(pid).length
         | 
| 38 49 | 
             
                solr_doc
         | 
| 39 50 | 
             
              end
         | 
| 40 51 |  | 
    
        data/config/fedora.yml
    CHANGED
    
    | @@ -1,11 +1,11 @@ | |
| 1 | 
            -
             | 
| 1 | 
            +
            alcott: &alcott
         | 
| 2 | 
            +
              :url: http://alcott.cul.columbia.edu:8080/fedora
         | 
| 2 3 | 
             
              :user: fedoraAdmin
         | 
| 3 | 
            -
              :password:  | 
| 4 | 
            -
             | 
| 5 | 
            -
              : | 
| 6 | 
            -
             | 
| 7 | 
            -
            test:
         | 
| 4 | 
            +
              :password: f+BULUS*^
         | 
| 5 | 
            +
            default: &default
         | 
| 6 | 
            +
              :url: http://127.0.0.1:8983/fedora
         | 
| 8 7 | 
             
              :user: fedoraAdmin
         | 
| 9 8 | 
             
              :password: fedoraAdmin
         | 
| 10 | 
            -
             | 
| 11 | 
            -
             | 
| 9 | 
            +
            development: *alcott
         | 
| 10 | 
            +
            test: *default
         | 
| 11 | 
            +
            production: *alcott
         | 
| @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            <info:fedora/marcorg:nnc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.loc.gov/mads/rdf/v1#CorporateName> . 
         | 
| 2 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#hasReciprocalAuthority> <http://id.loc.gov/vocabulary/organizations/nnc> . 
         | 
| 3 | 
            +
            <info:fedora/marcorg:nnc> <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.loc.gov/mads/rdf/v1#Authority> . 
         | 
| 4 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#code> "NNC"^^<http://id.loc.gov/datatypes/orgs/code> . 
         | 
| 5 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#code> "nnc"^^<http://id.loc.gov/datatypes/orgs/normalized> .
         | 
| 6 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#authoritativeLabel> "Butler Library"@en .
         | 
| 7 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#hasAbbreviationVariant> _:bnodereponncfacet .
         | 
| 8 | 
            +
            <info:fedora/marcorg:nnc> <http://www.loc.gov/mads/rdf/v1#hasExpansionVariant> _:bnodereponncfull .
         | 
| 9 | 
            +
            _:bnodereponncfacet <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.loc.gov/mads/rdf/v1#Variant> .
         | 
| 10 | 
            +
            _:bnodereponncfacet <http://www.loc.gov/mads/rdf/v1#variantLabel> "Butler Library"@en .
         | 
| 11 | 
            +
            _:bnodereponncfull <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.loc.gov/mads/rdf/v1#Variant> .
         | 
| 12 | 
            +
            _:bnodereponncfull <http://www.loc.gov/mads/rdf/v1#variantLabel> "Butler Library, Columbia University"@en
         | 
    
        data/config/subs.yml
    ADDED
    
    | @@ -0,0 +1,12 @@ | |
| 1 | 
            +
            development:
         | 
| 2 | 
            +
              djatoka_server: "http://iris.cul.columbia.edu:8080"
         | 
| 3 | 
            +
              fedora_server: "http://sayers.cul.columbia.edu:8080"
         | 
| 4 | 
            +
              php_server: "http://bach.cul.columbia.edu/dev"
         | 
| 5 | 
            +
            test:
         | 
| 6 | 
            +
              djatoka_server: "http://iris.cul.columbia.edu:8080"
         | 
| 7 | 
            +
              fedora_server: "http://sayers.cul.columbia.edu:8080"
         | 
| 8 | 
            +
              php_server: "http://bach.cul.columbia.edu/dev"
         | 
| 9 | 
            +
            production:
         | 
| 10 | 
            +
              djatoka_server: "http://iris.cul.columbia.edu:8080"
         | 
| 11 | 
            +
              fedora_server: "http://alcott.cul.columbia.edu:8080"
         | 
| 12 | 
            +
              php_server: "http://bach.cul.columbia.edu"
         | 
| @@ -1,7 +1,6 @@ | |
| 1 1 | 
             
            module Cul::Scv::Hydra::Indexer
         | 
| 2 2 |  | 
| 3 | 
            -
              def self. | 
| 4 | 
            -
             | 
| 3 | 
            +
              def self.descend_from(pid, pids_to_omit=nil, verbose_output=false)
         | 
| 5 4 | 
             
                if pid.blank?
         | 
| 6 5 | 
             
                  raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
         | 
| 7 6 | 
             
                end
         | 
| @@ -11,24 +10,12 @@ module Cul::Scv::Hydra::Indexer | |
| 11 10 | 
             
                end
         | 
| 12 11 |  | 
| 13 12 | 
             
                if pids_to_omit.present? && pids_to_omit.include?(pid)
         | 
| 14 | 
            -
                  puts 'Skipping  | 
| 13 | 
            +
                  puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
         | 
| 15 14 | 
             
                else
         | 
| 16 15 | 
             
                  puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
         | 
| 17 16 | 
             
                  puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
         | 
| 18 17 |  | 
| 19 | 
            -
                   | 
| 20 | 
            -
                  active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
         | 
| 21 | 
            -
             | 
| 22 | 
            -
                  if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
         | 
| 23 | 
            -
                    puts 'Top level object was skipped because GenericResources are being skipped and it is a GenericResource.'
         | 
| 24 | 
            -
                  else
         | 
| 25 | 
            -
                    begin
         | 
| 26 | 
            -
                      active_fedora_object.update_index
         | 
| 27 | 
            -
                    rescue Exception => e
         | 
| 28 | 
            -
                      puts 'Encountered problem.  Skipping record.  Exception: ' + e.message
         | 
| 29 | 
            -
                    end
         | 
| 30 | 
            -
                    puts 'Done indexing topmost object (' + pid + '). Took ' + (Time.now - START_TIME).to_s + ' seconds' if verbose_output
         | 
| 31 | 
            -
                  end
         | 
| 18 | 
            +
                  yield pid
         | 
| 32 19 |  | 
| 33 20 | 
             
                end
         | 
| 34 21 |  | 
| @@ -49,27 +36,36 @@ module Cul::Scv::Hydra::Indexer | |
| 49 36 | 
             
                if total_number_of_members > 0
         | 
| 50 37 | 
             
                  unique_pids.each {|pid|
         | 
| 51 38 |  | 
| 52 | 
            -
                     | 
| 39 | 
            +
                    puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
         | 
| 53 40 |  | 
| 54 | 
            -
                     | 
| 55 | 
            -
             | 
| 56 | 
            -
                    if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
         | 
| 57 | 
            -
                      puts "skipped (because we're skipping GenericResources." if verbose_output
         | 
| 58 | 
            -
                    else
         | 
| 59 | 
            -
                      begin
         | 
| 60 | 
            -
                        active_fedora_object.update_index
         | 
| 61 | 
            -
                      rescue Exception => e
         | 
| 62 | 
            -
                        puts 'Encountered problem.  Skipping record.  Exception: ' + e.message
         | 
| 63 | 
            -
                      end
         | 
| 64 | 
            -
                      # Display progress
         | 
| 65 | 
            -
                      puts 'done.' if verbose_output
         | 
| 66 | 
            -
                    end
         | 
| 41 | 
            +
                    yield pid
         | 
| 67 42 |  | 
| 68 43 | 
             
                    i += 1
         | 
| 69 44 | 
             
                  }
         | 
| 70 45 | 
             
                end
         | 
| 71 46 |  | 
| 72 | 
            -
                puts ' | 
| 47 | 
            +
                puts 'Recursion complete!'
         | 
| 48 | 
            +
             | 
| 49 | 
            +
              end
         | 
| 50 | 
            +
              def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
         | 
| 51 | 
            +
             | 
| 52 | 
            +
                descend_from(top_pid, pids_to_omit, verbose_output) do |pid|
         | 
| 53 | 
            +
             | 
| 54 | 
            +
                  # We found an object with the desired PID. Let's reindex it
         | 
| 55 | 
            +
                  active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
         | 
| 56 | 
            +
             | 
| 57 | 
            +
                  if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
         | 
| 58 | 
            +
                    puts 'Top level object was skipped because GenericResources are being skipped and it is a GenericResource.'
         | 
| 59 | 
            +
                  else
         | 
| 60 | 
            +
                    begin
         | 
| 61 | 
            +
                      active_fedora_object.update_index
         | 
| 62 | 
            +
                      puts 'done.' if verbose_output
         | 
| 63 | 
            +
                    rescue Exception => e
         | 
| 64 | 
            +
                      puts 'Encountered problem.  Skipping record.  Exception: ' + e.message
         | 
| 65 | 
            +
                    end
         | 
| 66 | 
            +
                  end
         | 
| 67 | 
            +
             | 
| 68 | 
            +
                end
         | 
| 73 69 |  | 
| 74 70 | 
             
              end
         | 
| 75 71 |  | 
| @@ -3,13 +3,12 @@ module Cul::Scv::Hydra::RisearchMembers | |
| 3 3 | 
             
              def self.get_recursive_member_pids(pid, verbose_output=false, cmodel_type='all')
         | 
| 4 4 |  | 
| 5 5 | 
             
                recursive_member_query =
         | 
| 6 | 
            -
                  'select $child $parent  | 
| 6 | 
            +
                  'select $child $parent from <#ri>
         | 
| 7 7 | 
             
                  where
         | 
| 8 | 
            -
                  walk($child <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '> and $child <http://purl.oclc.org/NET/CUL/memberOf> $parent)
         | 
| 9 | 
            -
                  and
         | 
| 10 | 
            -
                  $child <fedora-model:hasModel> $cmodel'
         | 
| 8 | 
            +
                  walk($child <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '> and $child <http://purl.oclc.org/NET/CUL/memberOf> $parent)'      
         | 
| 11 9 |  | 
| 12 10 | 
             
                unless cmodel_type == 'all'
         | 
| 11 | 
            +
                  recursive_member_query += ' and $child <fedora-model:hasModel> $cmodel'
         | 
| 13 12 | 
             
                  recursive_member_query += ' and $cmodel <mulgara:is> <info:fedora/ldpd:' + cmodel_type + '>'
         | 
| 14 13 | 
             
                end
         | 
| 15 14 |  | 
| @@ -29,27 +28,32 @@ module Cul::Scv::Hydra::RisearchMembers | |
| 29 28 |  | 
| 30 29 | 
             
              end
         | 
| 31 30 |  | 
| 32 | 
            -
              def self. | 
| 31 | 
            +
              def self.get_direct_member_results(pid, verbose_output=false, format='json')
         | 
| 33 32 |  | 
| 34 33 | 
             
                direct_member_query =
         | 
| 35 | 
            -
                  'select $pid  | 
| 36 | 
            -
                  where $pid <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '>
         | 
| 37 | 
            -
                  and $pid <fedora-model:hasModel> $cmodel'
         | 
| 34 | 
            +
                  'select $pid from <#ri>
         | 
| 35 | 
            +
                  where $pid <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '>'
         | 
| 38 36 |  | 
| 39 37 | 
             
                puts 'Performing query:' if verbose_output
         | 
| 40 38 | 
             
                puts direct_member_query if verbose_output
         | 
| 41 39 |  | 
| 42 40 | 
             
                search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(direct_member_query, {
         | 
| 43 41 | 
             
                  :type => 'tuples',
         | 
| 44 | 
            -
                  :format =>  | 
| 42 | 
            +
                  :format => format,
         | 
| 45 43 | 
             
                  :limit => '',
         | 
| 46 44 | 
             
                  :stream => 'on'
         | 
| 47 45 | 
             
                }))
         | 
| 48 46 |  | 
| 49 | 
            -
                 | 
| 50 | 
            -
             | 
| 51 | 
            -
                return unique_pids
         | 
| 52 | 
            -
             | 
| 47 | 
            +
                return search_response['results']
         | 
| 53 48 | 
             
              end
         | 
| 54 49 |  | 
| 50 | 
            +
              def self.get_direct_member_pids(pid, verbose_output=false)
         | 
| 51 | 
            +
                unique_pids = get_direct_member_results(pid,verbose_output,'json')
         | 
| 52 | 
            +
                unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
         | 
| 53 | 
            +
              end
         | 
| 54 | 
            +
              
         | 
| 55 | 
            +
              def self.get_direct_member_count(pid, verbose_output=false)
         | 
| 56 | 
            +
                count = get_direct_member_results(pid,verbose_output,'count/json')
         | 
| 57 | 
            +
                return count.blank? ? 0 : count[0]['count'].to_i
         | 
| 58 | 
            +
              end
         | 
| 55 59 | 
             
            end
         | 
| @@ -274,9 +274,6 @@ module Cul::Scv::Hydra::Solrizer | |
| 274 274 | 
             
                    places_without_uri << ScvModsFieldable.normalize(n.text, true)
         | 
| 275 275 | 
             
                  end
         | 
| 276 276 |  | 
| 277 | 
            -
            			puts 'places_with_uri: ' + places_with_uri.inspect
         | 
| 278 | 
            -
            			puts 'places_without_uri: ' + places_without_uri.inspect
         | 
| 279 | 
            -
             | 
| 280 277 | 
             
            			return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
         | 
| 281 278 | 
             
            		end
         | 
| 282 279 |  | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: cul_scv_hydra
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0. | 
| 4 | 
            +
              version: 0.19.0
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Benjamin Armintor
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2014-10- | 
| 11 | 
            +
            date: 2014-10-16 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: blacklight
         | 
| @@ -254,10 +254,11 @@ files: | |
| 254 254 | 
             
            - config/fedora.yml
         | 
| 255 255 | 
             
            - config/jetty.yml
         | 
| 256 256 | 
             
            - config/locales/ldpd_hydra.en.yml
         | 
| 257 | 
            +
            - config/locales/nnc.ntriples
         | 
| 257 258 | 
             
            - config/predicate_mappings.yml
         | 
| 258 | 
            -
            - config/solr.yml
         | 
| 259 259 | 
             
            - config/solr_mappings.yml
         | 
| 260 260 | 
             
            - config/solr_value_maps.yml
         | 
| 261 | 
            +
            - config/subs.yml
         | 
| 261 262 | 
             
            - lib/cul_scv_fedora/dummy_object.rb
         | 
| 262 263 | 
             
            - lib/cul_scv_fedora/rubydora_patch.rb
         | 
| 263 264 | 
             
            - lib/cul_scv_fedora/url_helper_behavior.rb
         | 
| @@ -293,7 +294,6 @@ files: | |
| 293 294 | 
             
            - lib/cul_scv_hydra/solrizer/terminology_based_solrizer.rb
         | 
| 294 295 | 
             
            - lib/cul_scv_hydra/solrizer/value_mapper.rb
         | 
| 295 296 | 
             
            - lib/cul_scv_hydra/version.rb
         | 
| 296 | 
            -
            - lib/cul_scv_hydra/version.rb~
         | 
| 297 297 | 
             
            - lib/tasks/cmodel.rake
         | 
| 298 298 | 
             
            - lib/tasks/cul_hydra_dev.rake
         | 
| 299 299 | 
             
            - lib/tasks/index.rake
         | 
    
        data/config/solr.yml
    DELETED