RubyGems - shelver - Versions diffs - 0.0.0 - Mend

shelver 0.0.0

Files changed (29) hide show

data/.gitignore +19 -0
data/LICENSE +20 -0
data/README.rdoc +17 -0
data/Rakefile +45 -0
data/VERSION +1 -0
data/config/hydra_types.yml +4 -0
data/config/solr.yml +24 -0
data/lib/shelver/configuration.rb +8 -0
data/lib/shelver/extractor.rb +89 -0
data/lib/shelver/indexer.rb +251 -0
data/lib/shelver/main.rb +17 -0
data/lib/shelver/replicator.rb +143 -0
data/lib/shelver/repository.rb +54 -0
data/lib/shelver.rb +103 -0
data/lib/tasks/shelver.rake +33 -0
data/shelver.gemspec +74 -0
data/spec/fixtures/druid-bv448hq0314-descMetadata.xml +11 -0
data/spec/fixtures/druid-bv448hq0314-extProperties.xml +52 -0
data/spec/fixtures/druid-cm234kq4672-extProperties.xml +5 -0
data/spec/fixtures/druid-cm234kq4672-stories.xml +17 -0
data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +11 -0
data/spec/fixtures/rels_ext_cmodel.xml +8 -0
data/spec/rcov.opts +2 -0
data/spec/spec.opts +1 -0
data/spec/spec_helper.rb +16 -0
data/spec/units/extractor_spec.rb +50 -0
data/spec/units/indexer_spec.rb +127 -0
data/spec/units/shelver_spec.rb +42 -0
metadata +106 -0

data/lib/shelver/repository.rb ADDED Viewed

@@ -0,0 +1,54 @@
+require 'active-fedora'
+module Shelver
+class Repository
+  #
+  # This method initializes the fedora repository and solr instance
+  #
+  def initialize_repository
+    Fedora::Repository.register( FEDORA_URL )
+    ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
+  end
+  #
+  # This method retrieves a comprehensive list of unique ids in the fedora repository
+  #
+  def self.get_pids( num_docs )
+    solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
+    id_array = []
+    solr_results.hits.each do |hit|
+      id_array << hit[SOLR_DOCUMENT_ID]
+    end
+    return id_array
+  end
+  #
+  # This method retrieves the object associated with the given unique id
+  #
+  def self.get_object( pid )
+    object = ActiveFedora::Base.load_instance( pid )
+  end
+  #
+  # This method retrieves a comprehensive list of datastreams for the given object
+  #
+  def self.get_datastreams( obj )
+    ds_keys = obj.datastreams.keys
+  end
+  #
+  # This method retrieves the datastream for the given object with the given datastream name
+  #
+  def self.get_datastream( obj, ds_name )
+    begin
+      obj.datastreams[ ds_name ]
+    rescue
+      return nil
+    end
+  end
+end
+end

data/lib/shelver.rb ADDED Viewed

@@ -0,0 +1,103 @@
+require 'rubygems'
+require 'shelver/indexer.rb'
+# require 'fastercsv'
+require "ruby-debug"
+module Shelver
+class Shelver
+  attr_accessor :indexer, :index_full_text
+  #
+  # This method initializes the indexer
+  # If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
+  #
+  def initialize( opts={} )
+    @@index_list = false unless defined?(@@index_list)
+    if opts[:index_full_text] == true || opts[:index_full_text] == "true"
+      @index_full_text = true
+    else
+      @index_full_text = false
+    end
+    @indexer = Indexer.new( :index_full_text=>@index_full_text )
+  end
+  #
+  # This method shelves the given Fedora object's full-text and facets into the search index
+  #
+  def shelve_object( obj )
+    # retrieve the Fedora object based on the given unique id
+      begin
+      start = Time.now
+      print "Retrieving object #{obj} ..."
+      obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
+          obj_done = Time.now
+          obj_done_elapse = obj_done - start
+          puts  " completed. Duration: #{obj_done_elapse}"
+          unless obj.datastreams['descMetadata'].nil? || obj.datastreams['location'].nil?
+                 print "\t Indexing object #{obj.pid} ... "
+                 # add the keywords and facets to the search index
+                 index_start = Time.now
+                 indexer.index( obj )
+                 index_done = Time.now
+                 index_elapsed = index_done - index_start
+                  puts "completed. Duration:  #{index_elapsed} ."
+          end #unless
+      rescue Exception => e
+           p "unable to index #{obj}.  Failed with #{e.inspect}"
+      end #begin
+  end
+  #
+  # This method retrieves a comprehensive list of all the unique identifiers in Fedora and
+  # shelves each object's full-text and facets into the search index
+  #
+  def shelve_objects
+    # retrieve a list of all the pids in the fedora repository
+    num_docs = 1000000   # modify this number to guarantee that all the objects are retrieved from the repository
+    puts "WARNING: You have turned off indexing of Full Text content.  Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
+    if @@index_list == false
+       pids = Repository.get_pids( num_docs )
+	     puts "Shelving #{pids.length} Fedora objects"
+       pids.each do |pid|
+         unless pid[0].empty? || pid[0].nil?
+          shelve_object( pid )
+          end
+        end #pids.each
+    else
+       if File.exists?(@@index_list)
+          arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
+          puts "Indexing from list at #{@@index_list}"
+          puts "Shelving #{arr_of_pids.length} Fedora objects"
+         arr_of_pids.each do |row|
+            pid = row[0]
+            shelve_object( pid )
+	 end #FASTERCSV
+        else
+          puts "#{@@index_list} does not exists!"
+        end #if File.exists
+    end #if Index_LISTS
+  end #shelve_objects
+end #class
+end #module

data/lib/tasks/shelver.rake ADDED Viewed

@@ -0,0 +1,33 @@
+namespace :shelver do
+  desc 'Index a fedora object of the given pid.'
+  task :shelve_object => :environment do
+    index_full_text = ENV['FULL_TEXT'] == 'true'
+    if ENV['PID']
+      puts "indexing #{ENV['PID'].inspect}"
+      shelver = Shelver::Shelver.new :index_full_text=> index_full_text
+      shelver.shelve_object(ENV['PID'])
+      puts "Finished shelving #{ENV['PID']}"
+    else
+      puts "You must provide a pid using the format 'shelver::shelve_object PID=sample:pid'."
+    end
+  end
+  desc 'Index all objects in the repository.'
+  task :shelve_objects => :environment do
+    index_full_text = ENV['FULL_TEXT'] == 'true'
+    if ENV['INDEX_LIST']
+      @@index_list = ENV['INDEX_LIST']
+    end
+    puts "Re-indexing Fedora Repository."
+    puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
+    puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
+    puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
+    puts "Doing full text index." if index_full_text
+    shelver = Shelver::Shelver.new :index_full_text=> index_full_text
+    shelver.shelve_objects
+    puts "Shelver task complete."
+  end
+end

data/shelver.gemspec ADDED Viewed

@@ -0,0 +1,74 @@
+# Generated by jeweler
+# DO NOT EDIT THIS FILE DIRECTLY
+# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
+# -*- encoding: utf-8 -*-
+Gem::Specification.new do |s|
+  s.name = %q{shelver}
+  s.version = "0.0.0"
+  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
+  s.authors = ["Matt Zumwalt"]
+  s.date = %q{2010-03-24}
+  s.description = %q{Use shelver to populate solr indexes from Fedora repository content or from other sources.  You can run shelver from within your apps, using the provided rake tasks, or as a JMS listener}
+  s.email = %q{matt.zumwalt@yourmediashelf.com}
+  s.extra_rdoc_files = [
+    "LICENSE",
+     "README.rdoc"
+  ]
+  s.files = [
+    ".gitignore",
+     "LICENSE",
+     "README.rdoc",
+     "Rakefile",
+     "VERSION",
+     "config/hydra_types.yml",
+     "config/solr.yml",
+     "lib/shelver.rb",
+     "lib/shelver/configuration.rb",
+     "lib/shelver/extractor.rb",
+     "lib/shelver/indexer.rb",
+     "lib/shelver/main.rb",
+     "lib/shelver/replicator.rb",
+     "lib/shelver/repository.rb",
+     "lib/tasks/shelver.rake",
+     "shelver.gemspec",
+     "spec/fixtures/druid-bv448hq0314-descMetadata.xml",
+     "spec/fixtures/druid-bv448hq0314-extProperties.xml",
+     "spec/fixtures/druid-cm234kq4672-extProperties.xml",
+     "spec/fixtures/druid-cm234kq4672-stories.xml",
+     "spec/fixtures/druid-hc513kw4806-descMetadata.xml",
+     "spec/fixtures/rels_ext_cmodel.xml",
+     "spec/rcov.opts",
+     "spec/spec.opts",
+     "spec/spec_helper.rb",
+     "spec/units/extractor_spec.rb",
+     "spec/units/indexer_spec.rb",
+     "spec/units/shelver_spec.rb"
+  ]
+  s.homepage = %q{http://github.com/mediashelf/shelver}
+  s.rdoc_options = ["--charset=UTF-8"]
+  s.require_paths = ["lib"]
+  s.rubygems_version = %q{1.3.6}
+  s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
+  s.test_files = [
+    "spec/spec_helper.rb",
+     "spec/units/extractor_spec.rb",
+     "spec/units/indexer_spec.rb",
+     "spec/units/shelver_spec.rb"
+  ]
+  if s.respond_to? :specification_version then
+    current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
+    s.specification_version = 3
+    if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
+      s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
+    else
+      s.add_dependency(%q<rspec>, [">= 1.2.9"])
+    end
+  else
+    s.add_dependency(%q<rspec>, [">= 1.2.9"])
+  end
+end

data/spec/fixtures/druid-bv448hq0314-descMetadata.xml ADDED Viewed

@@ -0,0 +1,11 @@
+<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
+  <dcterms:medium>Paper Document</dcterms:medium>
+  <dcterms:rights>Presumed under copyright. Do not publish.</dcterms:rights>
+  <dcterms:date>1985-12-30</dcterms:date>
+  <dcterms:format>application/tiff</dcterms:format>
+  <dcterms:format>application/jp2000</dcterms:format>
+  <dcterms:format>application/pdf</dcterms:format>
+  <dcterms:title>This is a Sample Title</dcterms:title>
+  <dcterms:publisher>Sample Unversity</dcterms:publisher>
+</dc>

data/spec/fixtures/druid-bv448hq0314-extProperties.xml ADDED Viewed

@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+  <attributes>
+    <attribute type="item">5958</attribute>
+    <attribute type="objectid">FEI0010-00013142</attribute>
+    <attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
+    <attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
+    <attribute type="description"/>
+    <attribute type="date">1985-12-30</attribute>
+    <attribute type="datestr">30/12/1985</attribute>
+    <attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
+    <attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
+    <attribute type="url"/>
+    <attribute type="industryterm"/>
+    <attribute type="technology">artificial intelligence</attribute>
+    <attribute type="company"/>
+    <attribute type="person">ELLIE ENGELMORE</attribute>
+    <attribute type="year">1985</attribute>
+    <attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
+    <attribute type="sourcelocation">Folder 15</attribute>
+  </attributes>
+  <facets>
+    <facet type="year" id="49">1980s</facet>
+    <facet type="year" id="49">1985</facet>
+    <facet type="year" id="42">1980s</facet>
+    <facet type="sourcelocation" id="592">Feigenbaum</facet>
+    <facet type="sourcelocation" id="592">eaf7000</facet>
+    <facet type="sourcelocation" id="592">Box 51A</facet>
+    <facet type="sourcelocation" id="594">Feigenbaum</facet>
+    <facet type="sourcelocation" id="594">eaf7000</facet>
+    <facet type="sourcelocation" id="594">Box 51A</facet>
+    <facet type="sourcelocation" id="594">Folder 15</facet>
+    <facet type="sourcelocation" id="691">Feigenbaum</facet>
+    <facet type="sourcelocation" id="692">Feigenbaum</facet>
+    <facet type="sourcelocation" id="692">eaf7000</facet>
+    <facet type="doctype" id="32">Correspondence</facet>
+    <facet type="city" id="82">Ann Arbor</facet>
+    <facet type="city" id="910">Hyderabad</facet>
+    <facet type="city" id="1519">Palo Alto</facet>
+    <facet type="country" id="68">India</facet>
+    <facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
+    <facet type="organization" id="5065">Heuristic Programming Project</facet>
+    <facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
+    <facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
+    <facet type="person" id="5810">ELLIE ENGELMORE</facet>
+    <facet type="person" id="17934">Reddy</facet>
+    <facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
+    <facet type="provinceorstate" id="96">Michigan</facet>
+    <facet type="provinceorstate" id="27">California</facet>
+    <facet type="technology" id="1713">artificial intelligence</facet>
+  </facets>
+</document>

data/spec/fixtures/druid-cm234kq4672-extProperties.xml ADDED Viewed

@@ -0,0 +1,5 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<document>
+ <attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
+ <facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
+</document>

data/spec/fixtures/druid-cm234kq4672-stories.xml ADDED Viewed

@@ -0,0 +1,17 @@
+<html>
+  <body>
+    <pre>
+This is
+preformatted text.
+It preserves      both spaces
+and line breaks.
+</pre>
+    <p>The pre tag is good for displaying computer code:</p>
+    <pre>
+for i = 1 to 10
+     print i
+next i
+</pre>
+  </body>
+</html>

data/spec/fixtures/druid-hc513kw4806-descMetadata.xml ADDED Viewed

@@ -0,0 +1,11 @@
+<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
+  <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
+  <dcterms:medium>Paper Document</dcterms:medium>
+  <dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
+  <dcterms:date/>
+  <dcterms:format>application/tiff</dcterms:format>
+  <dcterms:format>application/jp2000</dcterms:format>
+  <dcterms:format>application/pdf</dcterms:format>
+  <dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
+  <dcterms:publisher>Sample Unversity</dcterms:publisher>
+</dc>

data/spec/fixtures/rels_ext_cmodel.xml ADDED Viewed

@@ -0,0 +1,8 @@
+<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
+	<rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
+		<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
+		<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
+		<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
+		<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
+	</rdf:Description>
+</rdf:RDF>

data/spec/rcov.opts ADDED Viewed

	@@ -0,0 +1,2 @@
1	+ --exclude "spec/,gems/"
2	+ --rails

data/spec/spec.opts ADDED Viewed

	@@ -0,0 +1 @@
1	+ --color

data/spec/spec_helper.rb ADDED Viewed

@@ -0,0 +1,16 @@
+$LOAD_PATH.unshift(File.dirname(__FILE__))
+$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
+require 'shelver'
+require 'spec'
+require 'spec/autorun'
+Spec::Runner.configure do |config|
+  config.mock_with :mocha
+  def fixture(file)
+    File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
+  end
+end

data/spec/units/extractor_spec.rb ADDED Viewed

@@ -0,0 +1,50 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'shelver'
+describe Shelver::Extractor do
+  before(:each) do
+    @extractor = Shelver::Extractor.new
+  end
+  describe ".xml_to_solr" do
+    it "should turn simple xml into a solr document" do
+      desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
+      result = @extractor.xml_to_solr(desc_meta)
+      result[:type_t].should == "text"
+      result[:medium_t].should == "Paper Document"
+      result[:rights_t].should == "Presumed under copyright. Do not publish."
+      result[:date_t].should == "1985-12-30"
+      result[:format_t].should == "application/tiff"
+      result[:title_t].should == "This is a Sample Title"
+      result[:publisher_t].should == "Sample Unversity"
+      # ... and a hacky way of making sure that it added a field for each of the dc:medium values
+      result.inspect.include?('@value="application/tiff"').should be_true
+      result.inspect.include?('@value="application/pdf"').should be_true
+    end
+  end
+  describe "extract_rels_ext" do
+    it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
+      rels_ext = fixture("rels_ext_cmodel.xml")
+      result = @extractor.extract_rels_ext( rels_ext )
+      result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
+      result[:hydra_type_t].should == "salt_document"
+      # ... and a hacky way of making sure that it added a field for each of the dc:medium values
+      result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
+      result.inspect.include?('@value="jp2_document"').should be_true
+    end
+  end
+  describe "extract_hydra_types" do
+    it "should extract the hydra_type of a Fedora object" do
+      rels_ext = fixture("rels_ext_cmodel.xml")
+      result = @extractor.extract_rels_ext( rels_ext )
+      result[:hydra_type_t].should == "salt_document"
+    end
+  end
+end

data/spec/units/indexer_spec.rb ADDED Viewed

@@ -0,0 +1,127 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'shelver'
+describe Shelver::Indexer do
+  before(:each) do
+     Shelver::Indexer.any_instance.stubs(:connect).returns("foo")
+     @extractor = mock("Extractor")
+     @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
+#     @solr_doc = mock('solr_doc')
+#     @solr_doc.stubs(:<<)
+#     @solr_doc.stubs(:[])
+     @solr_doc = Solr::Document.new
+     Shelver::Extractor.expects(:new).returns(@extractor)
+     @indexer = Shelver::Indexer.new
+   end
+  describe "#generate_dates" do
+    it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
+    solr_result = @indexer.generate_dates(@solr_doc)
+    solr_result.should be_kind_of Solr::Document
+    solr_result[:date_t].should == "9999-99-99"
+    solr_result[:month_facet].should == "99"
+    solr_result[:day_facet].should == '99'
+    end
+    it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
+      @solr_doc << Solr::Field.new(:date_t => "Unknown")
+      solr_result = @indexer.generate_dates(@solr_doc)
+      solr_result.should be_kind_of Solr::Document
+      solr_result[:date_t].should == "Unknown"
+      solr_result[:month_facet].should == "99"
+      solr_result[:day_facet].should == '99'
+    end
+    it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format  " do
+       @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
+       solr_result = @indexer.generate_dates(@solr_doc)
+       solr_result.should be_kind_of Solr::Document
+       solr_result[:date_t].should == "0000-13-11"
+       solr_result[:month_facet].should == "99"
+       solr_result[:day_facet].should == '11'
+     end
+     it "should give month and day when in a valid date format" do
+           @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
+            solr_result = @indexer.generate_dates(@solr_doc)
+            solr_result.should be_kind_of Solr::Document
+            solr_result[:date_t].should == "1978-04-11"
+            solr_result[:month_facet].should == "04"
+            solr_result[:day_facet].should == '11'
+     end
+     it "should still give two digit strings even if the month/day is single digit" do
+         @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
+         solr_result = @indexer.generate_dates(@solr_doc)
+         solr_result.should be_kind_of Solr::Document
+         solr_result[:date_t].should == "1978-4-1"
+         solr_result[:month_facet].should == "04"
+         solr_result[:day_facet].should == '01'
+     end
+  end
+  describe "#solrize" do
+    it "should convert a hash to a solr doc" do
+      example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
+      example_result = Shelver::Indexer.solrize( example_hash )
+      example_result.should be_kind_of Solr::Document
+      example_hash.each_pair do |key,values|
+        if values.class == String
+          example_result["#{key}_facet"].should == values
+        else
+          values.each do |v|
+            example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
+            example_result.inspect.include?("@value=\"#{v}\"").should be_true
+          end
+        end
+      end
+    end
+    it "should handle hashes with facets listed in a sub-hash" do
+      simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
+      result = Shelver::Indexer.solrize( simple_hash )
+      result.should be_kind_of Solr::Document
+      result["technology_facet"].should == "t1"
+      result.inspect.include?('@boost=nil').should be_true
+      result.inspect.include?('@name="technology_facet"').should be_true
+      result.inspect.include?('@value="t2"').should be_true
+      result["company_facet"].should == "c1"
+      result["person_facet"].should == "p1"
+      result.inspect.include?('@name="person_facet"').should be_true
+      result.inspect.include?('@value="p2"').should be_true
+    end
+    it "should create symbols from the :symbols subhash" do
+      simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
+      result = Shelver::Indexer.solrize( simple_hash )
+      result.should be_kind_of Solr::Document
+      result["technology_s"].should == "t1"
+      result.inspect.include?('@name="technology_s"').should be_true
+      result.inspect.include?('@value="t2"').should be_true
+      result["company_s"].should == "c1"
+      result["person_s"].should == "p1"
+      result.inspect.include?('@name="person_s"').should be_true
+      result.inspect.include?('@value="p2"').should be_true
+    end
+  end
+end

data/spec/units/shelver_spec.rb ADDED Viewed

@@ -0,0 +1,42 @@
+require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
+require 'shelver'
+describe Shelver::Shelver do
+  before(:each) do
+    @shelver = Shelver::Shelver.new
+  end
+  describe "shelve_object" do
+    it "should trigger the indexer for the provided object" do
+      # sample_obj = ActiveFedora::Base.new
+      mock_object = mock("my object")
+      mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
+      mock_object.stubs(:pid)
+      mock_object.stubs(:label)
+      mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
+      ActiveFedora::Base.expects(:load_instance).never
+      @shelver.indexer.expects(:index).with( mock_object )
+      @shelver.shelve_object( mock_object )
+    end
+    it "should still load the object if only a pid is provided" do
+      mock_object = mock("my object")
+      mock_object.stubs(:pid)
+      mock_object.stubs(:label)
+      mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
+      ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
+      @shelver.indexer.expects(:index).with(mock_object)
+      @shelver.shelve_object("_PID_")
+    end
+  end
+  describe "shelve_objects" do
+    it "should call shelve_object for each pid returned by solr" do
+      pids = [["pid1"], ["pid2"], ["pid3"]]
+      Shelver::Repository.expects(:get_pids).returns(pids)
+      pids.each {|pid| @shelver.expects(:shelve_object).with( pid ) }
+      @shelver.shelve_objects
+    end
+  end
+end