spotlight-dor-resources 0.0.1 → 0.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +1 -0
- data/.rubocop.yml +4 -0
- data/.rubocop_todo.yml +0 -3
- data/lib/spotlight/dor/indexer.rb +25 -13
- data/lib/spotlight/dor/resources/version.rb +1 -1
- data/spec/unit/spotlight/dor/indexer_spec.rb +63 -1
- data/spotlight-dor-resources.gemspec +1 -1
- metadata +3 -3
    
        checksums.yaml
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            ---
         | 
| 2 2 | 
             
            SHA1:
         | 
| 3 | 
            -
              metadata.gz:  | 
| 4 | 
            -
              data.tar.gz:  | 
| 3 | 
            +
              metadata.gz: 31130b0ef44659b04e226ce3f7d54afeb2fab9bd
         | 
| 4 | 
            +
              data.tar.gz: 773201c4913261fb010cbd06a7bd027fb550acc1
         | 
| 5 5 | 
             
            SHA512:
         | 
| 6 | 
            -
              metadata.gz:  | 
| 7 | 
            -
              data.tar.gz:  | 
| 6 | 
            +
              metadata.gz: dd69ded57b4a0daaa03b544218a95edb3b23a503ae54c56274f7f8cc06bb8949120f19af29b9047b81d743914dd2e601fbeb5b4a133edfe2718084457290a096
         | 
| 7 | 
            +
              data.tar.gz: 90f34128fd6494c255f69f9cf4b430cdae7c060618ba648e99a982c3fdfdf2d41ade20b73d6febe48a8eae46b7dbf5823996b5469e25b756340a84fb217a2f04
         | 
    
        data/.gitignore
    CHANGED
    
    
    
        data/.rubocop.yml
    CHANGED
    
    
    
        data/.rubocop_todo.yml
    CHANGED
    
    
| @@ -31,11 +31,12 @@ module Spotlight::Dor | |
| 31 31 | 
             
                end
         | 
| 32 32 |  | 
| 33 33 | 
             
                # add fields from raw mods
         | 
| 34 | 
            -
                # see comment with add_donor_tags about Feigenbaum specific donor tags data
         | 
| 35 34 | 
             
                before_index :add_box
         | 
| 35 | 
            +
                # see comment with add_donor_tags about Feigenbaum specific donor tags data
         | 
| 36 36 | 
             
                before_index :add_donor_tags
         | 
| 37 37 | 
             
                before_index :add_genre
         | 
| 38 38 | 
             
                before_index :add_folder
         | 
| 39 | 
            +
                before_index :add_folder_name
         | 
| 39 40 | 
             
                before_index :add_series
         | 
| 40 41 | 
             
                before_index :mods_cartographics_indexing
         | 
| 41 42 |  | 
| @@ -72,7 +73,7 @@ module Spotlight::Dor | |
| 72 73 |  | 
| 73 74 | 
             
                # This new donor_tags_sim field was added in October 2015 specifically for the Feigenbaum exhibit.  It is very likely
         | 
| 74 75 | 
             
                #  it will go ununsed by other projects, but should be benign (since this field will not be created if this specific MODs note is not found.)
         | 
| 75 | 
            -
                #  Later refactoring could include  | 
| 76 | 
            +
                #  Later refactoring could include exhibit specific fields.   Peter Mangiafico
         | 
| 76 77 | 
             
                def add_donor_tags sdb, solr_doc
         | 
| 77 78 | 
             
                  donor_tags = sdb.smods_rec.note.select { |n| n.displayLabel == 'Donor tags' }.map(&:content)
         | 
| 78 79 | 
             
                  insert_field solr_doc, 'donor_tags', donor_tags, :symbol # this is a _ssim field
         | 
| @@ -86,21 +87,30 @@ module Spotlight::Dor | |
| 86 87 | 
             
                  #   _location.physicalLocation should find top level and relatedItem
         | 
| 87 88 | 
             
                  folder_num = sdb.smods_rec._location.physicalLocation.map do |node|
         | 
| 88 89 | 
             
                    val = node.text
         | 
| 89 | 
            -
             | 
| 90 | 
            -
                    match_data = val | 
| 91 | 
            -
             | 
| 92 | 
            -
             | 
| 93 | 
            -
             | 
| 94 | 
            -
             | 
| 95 | 
            -
             | 
| 96 | 
            -
             | 
| 97 | 
            -
             | 
| 98 | 
            -
             | 
| 99 | 
            -
                    end
         | 
| 90 | 
            +
             | 
| 91 | 
            +
                    match_data = if val =~ /\|/
         | 
| 92 | 
            +
                                   # we assume the data is pipe-delimited, and may contain commas within values
         | 
| 93 | 
            +
                                   val.match(/Folder ?:? ?([^|]+)/)
         | 
| 94 | 
            +
                                 else
         | 
| 95 | 
            +
                                   # the data should be comma-delimited, and may not contain commas within values
         | 
| 96 | 
            +
                                   val.match(/Folder ?:? ?([^,]+)/)
         | 
| 97 | 
            +
                                 end
         | 
| 98 | 
            +
             | 
| 99 | 
            +
                    match_data[1].strip if match_data.present?
         | 
| 100 100 | 
             
                  end
         | 
| 101 | 
            +
             | 
| 101 102 | 
             
                  solr_doc['folder_ssi'] = folder_num.first if folder_num.present?
         | 
| 102 103 | 
             
                end
         | 
| 103 104 |  | 
| 105 | 
            +
                # add the folder name to solr_doc as folder_name_ssi field (note: single valued!)
         | 
| 106 | 
            +
                #   data is specific to Feigenbaum collection and is in <note type='preferred citation'>
         | 
| 107 | 
            +
                def add_folder_name(sdb, solr_doc)
         | 
| 108 | 
            +
                  # see spec for data examples
         | 
| 109 | 
            +
                  preferred_citation = sdb.smods_rec.note.select { |n| n.type_at == 'preferred citation' }.map(&:content)
         | 
| 110 | 
            +
                  match_data = preferred_citation.first.match(/Title: +(.+)/i) if preferred_citation.present?
         | 
| 111 | 
            +
                  solr_doc['folder_name_ssi'] = match_data[1].rstrip if match_data.present?
         | 
| 112 | 
            +
                end
         | 
| 113 | 
            +
             | 
| 104 114 | 
             
                # add plain MODS <genre> element data, not the SearchWorks genre values
         | 
| 105 115 | 
             
                def add_genre sdb, solr_doc
         | 
| 106 116 | 
             
                  insert_field solr_doc, 'genre', sdb.smods_rec.genre.content, :symbol # this is a _ssim field
         | 
| @@ -121,6 +131,7 @@ module Spotlight::Dor | |
| 121 131 | 
             
                  solr_doc['series_ssi'] = series_num.first if series_num.present?
         | 
| 122 132 | 
             
                end
         | 
| 123 133 |  | 
| 134 | 
            +
                # rubocop:disable Metrics/AbcSize
         | 
| 124 135 | 
             
                def mods_cartographics_indexing sdb, solr_doc
         | 
| 125 136 | 
             
                  insert_field(solr_doc, "coordinates", Array(sdb.smods_rec.subject.cartographics.coordinates).map { |n| n.text }, :stored_searchable)
         | 
| 126 137 |  | 
| @@ -138,6 +149,7 @@ module Spotlight::Dor | |
| 138 149 | 
             
                    solr_doc["point_bbox"] << "#{minX} #{minY} #{maxX} #{maxY}"
         | 
| 139 150 | 
             
                  end
         | 
| 140 151 | 
             
                end
         | 
| 152 | 
            +
                # rubocop:enable Metrics/AbcSize
         | 
| 141 153 |  | 
| 142 154 | 
             
                def coord_to_decimal point
         | 
| 143 155 | 
             
                  regex = /(?<dir>[NESW])\s*(?<deg>\d+)°(?:(?<sec>\d+)ʹ)?/
         | 
| @@ -172,7 +172,9 @@ describe Spotlight::Dor::Indexer do | |
| 172 172 | 
             
                  'Flat-box 228 | Volume 1': nil,
         | 
| 173 173 | 
             
                  # shpc (actually in <relatedItem><location><physicalLocation>)
         | 
| 174 174 | 
             
                  'Series Biographical Photographs | Box 1 | Folder Abbot, Nathan': 'Abbot, Nathan',
         | 
| 175 | 
            -
                  'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad'
         | 
| 175 | 
            +
                  'Series General Photographs | Box 1 | Folder Administration building--Outer Quad': 'Administration building--Outer Quad',
         | 
| 176 | 
            +
                  # hypothetical
         | 
| 177 | 
            +
                  'Folder: 42, Sheet: 15': '42'
         | 
| 176 178 | 
             
                }.each do |example, expected|
         | 
| 177 179 | 
             
                  describe "for example '#{example}'" do
         | 
| 178 180 | 
             
                    let(:example) { example }
         | 
| @@ -197,4 +199,64 @@ describe Spotlight::Dor::Indexer do | |
| 197 199 | 
             
                  end # for example
         | 
| 198 200 | 
             
                end # each
         | 
| 199 201 | 
             
              end # add_folder
         | 
| 202 | 
            +
             | 
| 203 | 
            +
              let(:mods_note_plain) do
         | 
| 204 | 
            +
                Nokogiri::XML <<-EOF
         | 
| 205 | 
            +
                  <mods xmlns="#{Mods::MODS_NS}">
         | 
| 206 | 
            +
                    <note>#{example}</note>
         | 
| 207 | 
            +
                  </mods>
         | 
| 208 | 
            +
                EOF
         | 
| 209 | 
            +
              end
         | 
| 210 | 
            +
              let(:mods_note_preferred_citation) do
         | 
| 211 | 
            +
                Nokogiri::XML <<-EOF
         | 
| 212 | 
            +
                  <mods xmlns="#{Mods::MODS_NS}">
         | 
| 213 | 
            +
                    <note type="preferred citation">#{example}</note>
         | 
| 214 | 
            +
                  </mods>
         | 
| 215 | 
            +
                EOF
         | 
| 216 | 
            +
              end
         | 
| 217 | 
            +
              describe "#add_folder_name" do
         | 
| 218 | 
            +
                # example string as key, expected folder name as value
         | 
| 219 | 
            +
                # all from feigenbaum (or based on feigenbaum), as that is only coll
         | 
| 220 | 
            +
                {
         | 
| 221 | 
            +
                  'Call Number: SC0340, Accession: 1986-052, Box: 20, Folder: 40, Title: S': 'S',
         | 
| 222 | 
            +
                  'Call Number: SC0340, Accession: 1986-052, Box: 54, Folder: 25, Title: Balzer': 'Balzer',
         | 
| 223 | 
            +
                  'Call Number: SC0340, Accession: 1986-052, Box : 30, Folder: 21, Title: Feigenbaum, Publications. 2 of 2.': 'Feigenbaum, Publications. 2 of 2.',
         | 
| 224 | 
            +
                  # colon in name
         | 
| 225 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box: 10, Folder: 26, Title: Gordon Bell Letter rdf:about blah (AI) 1987': 'Gordon Bell Letter rdf:about blah (AI) 1987',
         | 
| 226 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 74, Title: Microcomputer Systems Proposal: blah blah': 'Microcomputer Systems Proposal: blah blah',
         | 
| 227 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box: 14, Folder: 20, Title: blah "bleah: blargW^"ugh" seriously?.': 'blah "bleah: blargW^"ugh" seriously?.',
         | 
| 228 | 
            +
                  # quotes in name
         | 
| 229 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box: 29, Folder: 18, Title: "bleah" blah': '"bleah" blah',
         | 
| 230 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box: 11, Folder: 58, Title: "M": blah': '"M": blah',
         | 
| 231 | 
            +
                  'Call Number: SC0340, Accession 2005-101, Box : 32A, Folder: 19, Title: blah "bleah" blue': 'blah "bleah" blue',
         | 
| 232 | 
            +
                  # not parseable
         | 
| 233 | 
            +
                  'Call Number: SC0340, Accession 2005-101': nil,
         | 
| 234 | 
            +
                  'Call Number: SC0340, Accession: 1986-052': nil,
         | 
| 235 | 
            +
                  'Call Number: SC0340, Accession: 1986-052, Box 36 Folder 38': nil,
         | 
| 236 | 
            +
                  'blah blah ... with the umbrella title Feigenbaum and Feldman, Computers and Thought II. blah blah': nil,
         | 
| 237 | 
            +
                  'blah blah ... Title ... blah blah': nil
         | 
| 238 | 
            +
                }.each do |example, expected|
         | 
| 239 | 
            +
                  describe "for example '#{example}'" do
         | 
| 240 | 
            +
                    let(:example) { example }
         | 
| 241 | 
            +
                    context 'in preferred citation note' do
         | 
| 242 | 
            +
                      before do
         | 
| 243 | 
            +
                        allow(r).to receive(:mods).and_return(mods_note_preferred_citation)
         | 
| 244 | 
            +
                        subject.send(:add_folder_name, sdb, solr_doc)
         | 
| 245 | 
            +
                      end
         | 
| 246 | 
            +
                      it "has the expected folder name '#{expected}'" do
         | 
| 247 | 
            +
                        expect(solr_doc['folder_name_ssi']).to eq expected
         | 
| 248 | 
            +
                      end
         | 
| 249 | 
            +
                    end
         | 
| 250 | 
            +
                    context 'in plain note' do
         | 
| 251 | 
            +
                      before do
         | 
| 252 | 
            +
                        allow(r).to receive(:mods).and_return(mods_note_plain)
         | 
| 253 | 
            +
                        subject.send(:add_folder_name, sdb, solr_doc)
         | 
| 254 | 
            +
                      end
         | 
| 255 | 
            +
                      it 'does not have a folder name' do
         | 
| 256 | 
            +
                        expect(solr_doc['folder_name_ssi']).to be_falsey
         | 
| 257 | 
            +
                      end
         | 
| 258 | 
            +
                    end
         | 
| 259 | 
            +
                  end # for example
         | 
| 260 | 
            +
                end # each
         | 
| 261 | 
            +
              end # add_folder_name
         | 
| 200 262 | 
             
            end
         | 
| @@ -9,7 +9,7 @@ Gem::Specification.new do |spec| | |
| 9 9 | 
             
              spec.authors       = ["Chris Beer"]
         | 
| 10 10 | 
             
              spec.email         = ["cabeer@stanford.edu"]
         | 
| 11 11 | 
             
              spec.summary       = "Spotlight resource indexer for DOR resources."
         | 
| 12 | 
            -
              spec.homepage      = ""
         | 
| 12 | 
            +
              spec.homepage      = "https://github.com/sul-dlss/spotlight-dor-resources"
         | 
| 13 13 | 
             
              spec.license       = "Apache 2"
         | 
| 14 14 |  | 
| 15 15 | 
             
              spec.files         = `git ls-files -z`.split("\x0")
         | 
    
        metadata
    CHANGED
    
    | @@ -1,14 +1,14 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification
         | 
| 2 2 | 
             
            name: spotlight-dor-resources
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version
         | 
| 4 | 
            -
              version: 0.0. | 
| 4 | 
            +
              version: 0.0.2
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors:
         | 
| 7 7 | 
             
            - Chris Beer
         | 
| 8 8 | 
             
            autorequire: 
         | 
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 | 
            -
            date: 2015-10- | 
| 11 | 
            +
            date: 2015-10-30 00:00:00.000000000 Z
         | 
| 12 12 | 
             
            dependencies:
         | 
| 13 13 | 
             
            - !ruby/object:Gem::Dependency
         | 
| 14 14 | 
             
              name: faraday
         | 
| @@ -292,7 +292,7 @@ files: | |
| 292 292 | 
             
            - spec/vcr_cassettes/indexer_integration_tests/genre/no_genre_ssim_field_when_genre_not_in_MODS.yml
         | 
| 293 293 | 
             
            - spec/vcr_cassettes/indexer_integration_tests/genre/solr_doc_has_genre_ssim_field_when_genre_in_MODS.yml
         | 
| 294 294 | 
             
            - spotlight-dor-resources.gemspec
         | 
| 295 | 
            -
            homepage:  | 
| 295 | 
            +
            homepage: https://github.com/sul-dlss/spotlight-dor-resources
         | 
| 296 296 | 
             
            licenses:
         | 
| 297 297 | 
             
            - Apache 2
         | 
| 298 298 | 
             
            metadata: {}
         |