shelver 0.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,54 @@
1
+
2
+ require 'active-fedora'
3
+
4
+
5
+ module Shelver
6
+ class Repository
7
+
8
+ #
9
+ # This method initializes the fedora repository and solr instance
10
+ #
11
+ def initialize_repository
12
+ Fedora::Repository.register( FEDORA_URL )
13
+ ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
14
+ end
15
+
16
+ #
17
+ # This method retrieves a comprehensive list of unique ids in the fedora repository
18
+ #
19
+ def self.get_pids( num_docs )
20
+ solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
21
+ id_array = []
22
+ solr_results.hits.each do |hit|
23
+ id_array << hit[SOLR_DOCUMENT_ID]
24
+ end
25
+ return id_array
26
+ end
27
+
28
+ #
29
+ # This method retrieves the object associated with the given unique id
30
+ #
31
+ def self.get_object( pid )
32
+ object = ActiveFedora::Base.load_instance( pid )
33
+ end
34
+
35
+ #
36
+ # This method retrieves a comprehensive list of datastreams for the given object
37
+ #
38
+ def self.get_datastreams( obj )
39
+ ds_keys = obj.datastreams.keys
40
+ end
41
+
42
+ #
43
+ # This method retrieves the datastream for the given object with the given datastream name
44
+ #
45
+ def self.get_datastream( obj, ds_name )
46
+ begin
47
+ obj.datastreams[ ds_name ]
48
+ rescue
49
+ return nil
50
+ end
51
+ end
52
+
53
+ end
54
+ end
data/lib/shelver.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'rubygems'
2
+ require 'shelver/indexer.rb'
3
+ # require 'fastercsv'
4
+ require "ruby-debug"
5
+
6
+
7
+
8
+ module Shelver
9
+ class Shelver
10
+
11
+ attr_accessor :indexer, :index_full_text
12
+
13
+ #
14
+ # This method initializes the indexer
15
+ # If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
16
+ #
17
+ def initialize( opts={} )
18
+ @@index_list = false unless defined?(@@index_list)
19
+ if opts[:index_full_text] == true || opts[:index_full_text] == "true"
20
+ @index_full_text = true
21
+ else
22
+ @index_full_text = false
23
+ end
24
+ @indexer = Indexer.new( :index_full_text=>@index_full_text )
25
+ end
26
+
27
+ #
28
+ # This method shelves the given Fedora object's full-text and facets into the search index
29
+ #
30
+ def shelve_object( obj )
31
+ # retrieve the Fedora object based on the given unique id
32
+
33
+ begin
34
+
35
+ start = Time.now
36
+ print "Retrieving object #{obj} ..."
37
+ obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
38
+
39
+ obj_done = Time.now
40
+ obj_done_elapse = obj_done - start
41
+ puts " completed. Duration: #{obj_done_elapse}"
42
+
43
+ unless obj.datastreams['descMetadata'].nil? || obj.datastreams['location'].nil?
44
+ print "\t Indexing object #{obj.pid} ... "
45
+ # add the keywords and facets to the search index
46
+ index_start = Time.now
47
+ indexer.index( obj )
48
+
49
+ index_done = Time.now
50
+ index_elapsed = index_done - index_start
51
+
52
+ puts "completed. Duration: #{index_elapsed} ."
53
+ end #unless
54
+
55
+
56
+ rescue Exception => e
57
+ p "unable to index #{obj}. Failed with #{e.inspect}"
58
+
59
+
60
+ end #begin
61
+
62
+ end
63
+
64
+ #
65
+ # This method retrieves a comprehensive list of all the unique identifiers in Fedora and
66
+ # shelves each object's full-text and facets into the search index
67
+ #
68
+ def shelve_objects
69
+ # retrieve a list of all the pids in the fedora repository
70
+ num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
71
+ puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
72
+
73
+ if @@index_list == false
74
+
75
+ pids = Repository.get_pids( num_docs )
76
+ puts "Shelving #{pids.length} Fedora objects"
77
+ pids.each do |pid|
78
+ unless pid[0].empty? || pid[0].nil?
79
+ shelve_object( pid )
80
+ end
81
+ end #pids.each
82
+
83
+ else
84
+
85
+ if File.exists?(@@index_list)
86
+ arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
87
+
88
+ puts "Indexing from list at #{@@index_list}"
89
+ puts "Shelving #{arr_of_pids.length} Fedora objects"
90
+
91
+ arr_of_pids.each do |row|
92
+ pid = row[0]
93
+ shelve_object( pid )
94
+ end #FASTERCSV
95
+ else
96
+ puts "#{@@index_list} does not exists!"
97
+ end #if File.exists
98
+
99
+ end #if Index_LISTS
100
+ end #shelve_objects
101
+
102
+ end #class
103
+ end #module
@@ -0,0 +1,33 @@
1
+ namespace :shelver do
2
+
3
+ desc 'Index a fedora object of the given pid.'
4
+ task :shelve_object => :environment do
5
+ index_full_text = ENV['FULL_TEXT'] == 'true'
6
+ if ENV['PID']
7
+ puts "indexing #{ENV['PID'].inspect}"
8
+ shelver = Shelver::Shelver.new :index_full_text=> index_full_text
9
+ shelver.shelve_object(ENV['PID'])
10
+ puts "Finished shelving #{ENV['PID']}"
11
+ else
12
+ puts "You must provide a pid using the format 'shelver::shelve_object PID=sample:pid'."
13
+ end
14
+ end
15
+
16
+ desc 'Index all objects in the repository.'
17
+ task :shelve_objects => :environment do
18
+ index_full_text = ENV['FULL_TEXT'] == 'true'
19
+ if ENV['INDEX_LIST']
20
+ @@index_list = ENV['INDEX_LIST']
21
+ end
22
+
23
+ puts "Re-indexing Fedora Repository."
24
+ puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
25
+ puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
26
+ puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
27
+ puts "Doing full text index." if index_full_text
28
+ shelver = Shelver::Shelver.new :index_full_text=> index_full_text
29
+ shelver.shelve_objects
30
+ puts "Shelver task complete."
31
+ end
32
+
33
+ end
data/shelver.gemspec ADDED
@@ -0,0 +1,74 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{shelver}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Matt Zumwalt"]
12
+ s.date = %q{2010-03-24}
13
+ s.description = %q{Use shelver to populate solr indexes from Fedora repository content or from other sources. You can run shelver from within your apps, using the provided rake tasks, or as a JMS listener}
14
+ s.email = %q{matt.zumwalt@yourmediashelf.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "config/hydra_types.yml",
26
+ "config/solr.yml",
27
+ "lib/shelver.rb",
28
+ "lib/shelver/configuration.rb",
29
+ "lib/shelver/extractor.rb",
30
+ "lib/shelver/indexer.rb",
31
+ "lib/shelver/main.rb",
32
+ "lib/shelver/replicator.rb",
33
+ "lib/shelver/repository.rb",
34
+ "lib/tasks/shelver.rake",
35
+ "shelver.gemspec",
36
+ "spec/fixtures/druid-bv448hq0314-descMetadata.xml",
37
+ "spec/fixtures/druid-bv448hq0314-extProperties.xml",
38
+ "spec/fixtures/druid-cm234kq4672-extProperties.xml",
39
+ "spec/fixtures/druid-cm234kq4672-stories.xml",
40
+ "spec/fixtures/druid-hc513kw4806-descMetadata.xml",
41
+ "spec/fixtures/rels_ext_cmodel.xml",
42
+ "spec/rcov.opts",
43
+ "spec/spec.opts",
44
+ "spec/spec_helper.rb",
45
+ "spec/units/extractor_spec.rb",
46
+ "spec/units/indexer_spec.rb",
47
+ "spec/units/shelver_spec.rb"
48
+ ]
49
+ s.homepage = %q{http://github.com/mediashelf/shelver}
50
+ s.rdoc_options = ["--charset=UTF-8"]
51
+ s.require_paths = ["lib"]
52
+ s.rubygems_version = %q{1.3.6}
53
+ s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
54
+ s.test_files = [
55
+ "spec/spec_helper.rb",
56
+ "spec/units/extractor_spec.rb",
57
+ "spec/units/indexer_spec.rb",
58
+ "spec/units/shelver_spec.rb"
59
+ ]
60
+
61
+ if s.respond_to? :specification_version then
62
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
63
+ s.specification_version = 3
64
+
65
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
66
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
67
+ else
68
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
69
+ end
70
+ else
71
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
72
+ end
73
+ end
74
+
@@ -0,0 +1,11 @@
1
+ <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
+ <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
+ <dcterms:medium>Paper Document</dcterms:medium>
4
+ <dcterms:rights>Presumed under copyright. Do not publish.</dcterms:rights>
5
+ <dcterms:date>1985-12-30</dcterms:date>
6
+ <dcterms:format>application/tiff</dcterms:format>
7
+ <dcterms:format>application/jp2000</dcterms:format>
8
+ <dcterms:format>application/pdf</dcterms:format>
9
+ <dcterms:title>This is a Sample Title</dcterms:title>
10
+ <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
+ </dc>
@@ -0,0 +1,52 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <document>
3
+ <attributes>
4
+ <attribute type="item">5958</attribute>
5
+ <attribute type="objectid">FEI0010-00013142</attribute>
6
+ <attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
7
+ <attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
8
+ <attribute type="description"/>
9
+ <attribute type="date">1985-12-30</attribute>
10
+ <attribute type="datestr">30/12/1985</attribute>
11
+ <attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
12
+ <attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
13
+ <attribute type="url"/>
14
+ <attribute type="industryterm"/>
15
+ <attribute type="technology">artificial intelligence</attribute>
16
+ <attribute type="company"/>
17
+ <attribute type="person">ELLIE ENGELMORE</attribute>
18
+ <attribute type="year">1985</attribute>
19
+ <attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
20
+ <attribute type="sourcelocation">Folder 15</attribute>
21
+ </attributes>
22
+ <facets>
23
+ <facet type="year" id="49">1980s</facet>
24
+ <facet type="year" id="49">1985</facet>
25
+ <facet type="year" id="42">1980s</facet>
26
+ <facet type="sourcelocation" id="592">Feigenbaum</facet>
27
+ <facet type="sourcelocation" id="592">eaf7000</facet>
28
+ <facet type="sourcelocation" id="592">Box 51A</facet>
29
+ <facet type="sourcelocation" id="594">Feigenbaum</facet>
30
+ <facet type="sourcelocation" id="594">eaf7000</facet>
31
+ <facet type="sourcelocation" id="594">Box 51A</facet>
32
+ <facet type="sourcelocation" id="594">Folder 15</facet>
33
+ <facet type="sourcelocation" id="691">Feigenbaum</facet>
34
+ <facet type="sourcelocation" id="692">Feigenbaum</facet>
35
+ <facet type="sourcelocation" id="692">eaf7000</facet>
36
+ <facet type="doctype" id="32">Correspondence</facet>
37
+ <facet type="city" id="82">Ann Arbor</facet>
38
+ <facet type="city" id="910">Hyderabad</facet>
39
+ <facet type="city" id="1519">Palo Alto</facet>
40
+ <facet type="country" id="68">India</facet>
41
+ <facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
42
+ <facet type="organization" id="5065">Heuristic Programming Project</facet>
43
+ <facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
44
+ <facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
45
+ <facet type="person" id="5810">ELLIE ENGELMORE</facet>
46
+ <facet type="person" id="17934">Reddy</facet>
47
+ <facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
48
+ <facet type="provinceorstate" id="96">Michigan</facet>
49
+ <facet type="provinceorstate" id="27">California</facet>
50
+ <facet type="technology" id="1713">artificial intelligence</facet>
51
+ </facets>
52
+ </document>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <document>
3
+ <attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
4
+ <facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
5
+ </document>
@@ -0,0 +1,17 @@
1
+
2
+ <html>
3
+ <body>
4
+ <pre>
5
+ This is
6
+ preformatted text.
7
+ It preserves both spaces
8
+ and line breaks.
9
+ </pre>
10
+ <p>The pre tag is good for displaying computer code:</p>
11
+ <pre>
12
+ for i = 1 to 10
13
+ print i
14
+ next i
15
+ </pre>
16
+ </body>
17
+ </html>
@@ -0,0 +1,11 @@
1
+ <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
+ <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
+ <dcterms:medium>Paper Document</dcterms:medium>
4
+ <dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
5
+ <dcterms:date/>
6
+ <dcterms:format>application/tiff</dcterms:format>
7
+ <dcterms:format>application/jp2000</dcterms:format>
8
+ <dcterms:format>application/pdf</dcterms:format>
9
+ <dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
10
+ <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
+ </dc>
@@ -0,0 +1,8 @@
1
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
2
+ <rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
3
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
4
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
5
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
6
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
7
+ </rdf:Description>
8
+ </rdf:RDF>
data/spec/rcov.opts ADDED
@@ -0,0 +1,2 @@
1
+ --exclude "spec/*,gems/*"
2
+ --rails
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'shelver'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ config.mock_with :mocha
10
+
11
+
12
+ def fixture(file)
13
+ File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
14
+ end
15
+
16
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Extractor do
5
+
6
+ before(:each) do
7
+ @extractor = Shelver::Extractor.new
8
+ end
9
+
10
+ describe ".xml_to_solr" do
11
+ it "should turn simple xml into a solr document" do
12
+ desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
13
+ result = @extractor.xml_to_solr(desc_meta)
14
+ result[:type_t].should == "text"
15
+ result[:medium_t].should == "Paper Document"
16
+ result[:rights_t].should == "Presumed under copyright. Do not publish."
17
+ result[:date_t].should == "1985-12-30"
18
+ result[:format_t].should == "application/tiff"
19
+ result[:title_t].should == "This is a Sample Title"
20
+ result[:publisher_t].should == "Sample Unversity"
21
+
22
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
23
+ result.inspect.include?('@value="application/tiff"').should be_true
24
+ result.inspect.include?('@value="application/pdf"').should be_true
25
+ end
26
+ end
27
+
28
+ describe "extract_rels_ext" do
29
+ it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
30
+ rels_ext = fixture("rels_ext_cmodel.xml")
31
+ result = @extractor.extract_rels_ext( rels_ext )
32
+ result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
33
+ result[:hydra_type_t].should == "salt_document"
34
+
35
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
36
+ result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
37
+ result.inspect.include?('@value="jp2_document"').should be_true
38
+ end
39
+ end
40
+
41
+ describe "extract_hydra_types" do
42
+ it "should extract the hydra_type of a Fedora object" do
43
+ rels_ext = fixture("rels_ext_cmodel.xml")
44
+ result = @extractor.extract_rels_ext( rels_ext )
45
+ result[:hydra_type_t].should == "salt_document"
46
+ end
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,127 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Indexer do
5
+
6
+ before(:each) do
7
+ Shelver::Indexer.any_instance.stubs(:connect).returns("foo")
8
+
9
+ @extractor = mock("Extractor")
10
+ @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
+ # @solr_doc = mock('solr_doc')
12
+ # @solr_doc.stubs(:<<)
13
+ # @solr_doc.stubs(:[])
14
+
15
+ @solr_doc = Solr::Document.new
16
+
17
+ Shelver::Extractor.expects(:new).returns(@extractor)
18
+ @indexer = Shelver::Indexer.new
19
+
20
+ end
21
+
22
+ describe "#generate_dates" do
23
+ it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
+
25
+ solr_result = @indexer.generate_dates(@solr_doc)
26
+ solr_result.should be_kind_of Solr::Document
27
+ solr_result[:date_t].should == "9999-99-99"
28
+ solr_result[:month_facet].should == "99"
29
+ solr_result[:day_facet].should == '99'
30
+
31
+ end
32
+
33
+ it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
+
35
+ @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
+ solr_result = @indexer.generate_dates(@solr_doc)
37
+ solr_result.should be_kind_of Solr::Document
38
+ solr_result[:date_t].should == "Unknown"
39
+ solr_result[:month_facet].should == "99"
40
+ solr_result[:day_facet].should == '99'
41
+
42
+ end
43
+
44
+ it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
+
46
+ @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
+ solr_result = @indexer.generate_dates(@solr_doc)
48
+ solr_result.should be_kind_of Solr::Document
49
+ solr_result[:date_t].should == "0000-13-11"
50
+ solr_result[:month_facet].should == "99"
51
+ solr_result[:day_facet].should == '11'
52
+ end
53
+
54
+ it "should give month and day when in a valid date format" do
55
+ @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
+ solr_result = @indexer.generate_dates(@solr_doc)
57
+ solr_result.should be_kind_of Solr::Document
58
+ solr_result[:date_t].should == "1978-04-11"
59
+ solr_result[:month_facet].should == "04"
60
+ solr_result[:day_facet].should == '11'
61
+
62
+ end
63
+
64
+ it "should still give two digit strings even if the month/day is single digit" do
65
+
66
+ @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
+ solr_result = @indexer.generate_dates(@solr_doc)
68
+ solr_result.should be_kind_of Solr::Document
69
+ solr_result[:date_t].should == "1978-4-1"
70
+ solr_result[:month_facet].should == "04"
71
+ solr_result[:day_facet].should == '01'
72
+
73
+ end
74
+
75
+ end
76
+
77
+
78
+
79
+ describe "#solrize" do
80
+ it "should convert a hash to a solr doc" do
81
+ example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
+
83
+ example_result = Shelver::Indexer.solrize( example_hash )
84
+ example_result.should be_kind_of Solr::Document
85
+ example_hash.each_pair do |key,values|
86
+ if values.class == String
87
+ example_result["#{key}_facet"].should == values
88
+ else
89
+ values.each do |v|
90
+ example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
+ example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ it "should handle hashes with facets listed in a sub-hash" do
98
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
+ result = Shelver::Indexer.solrize( simple_hash )
100
+ result.should be_kind_of Solr::Document
101
+ result["technology_facet"].should == "t1"
102
+ result.inspect.include?('@boost=nil').should be_true
103
+ result.inspect.include?('@name="technology_facet"').should be_true
104
+ result.inspect.include?('@value="t2"').should be_true
105
+ result["company_facet"].should == "c1"
106
+ result["person_facet"].should == "p1"
107
+ result.inspect.include?('@name="person_facet"').should be_true
108
+ result.inspect.include?('@value="p2"').should be_true
109
+
110
+ end
111
+
112
+ it "should create symbols from the :symbols subhash" do
113
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
+ result = Shelver::Indexer.solrize( simple_hash )
115
+ result.should be_kind_of Solr::Document
116
+ result["technology_s"].should == "t1"
117
+ result.inspect.include?('@name="technology_s"').should be_true
118
+ result.inspect.include?('@value="t2"').should be_true
119
+
120
+ result["company_s"].should == "c1"
121
+ result["person_s"].should == "p1"
122
+ result.inspect.include?('@name="person_s"').should be_true
123
+ result.inspect.include?('@value="p2"').should be_true
124
+
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,42 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Shelver do
5
+
6
+ before(:each) do
7
+ @shelver = Shelver::Shelver.new
8
+ end
9
+
10
+ describe "shelve_object" do
11
+ it "should trigger the indexer for the provided object" do
12
+ # sample_obj = ActiveFedora::Base.new
13
+ mock_object = mock("my object")
14
+ mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
+ mock_object.stubs(:pid)
16
+ mock_object.stubs(:label)
17
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
+ ActiveFedora::Base.expects(:load_instance).never
19
+ @shelver.indexer.expects(:index).with( mock_object )
20
+ @shelver.shelve_object( mock_object )
21
+ end
22
+ it "should still load the object if only a pid is provided" do
23
+ mock_object = mock("my object")
24
+ mock_object.stubs(:pid)
25
+ mock_object.stubs(:label)
26
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
27
+
28
+ ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
29
+ @shelver.indexer.expects(:index).with(mock_object)
30
+ @shelver.shelve_object("_PID_")
31
+ end
32
+ end
33
+
34
+ describe "shelve_objects" do
35
+ it "should call shelve_object for each pid returned by solr" do
36
+ pids = [["pid1"], ["pid2"], ["pid3"]]
37
+ Shelver::Repository.expects(:get_pids).returns(pids)
38
+ pids.each {|pid| @shelver.expects(:shelve_object).with( pid ) }
39
+ @shelver.shelve_objects
40
+ end
41
+ end
42
+ end