shelver 0.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,54 @@
1
+
2
+ require 'active-fedora'
3
+
4
+
5
+ module Shelver
6
+ class Repository
7
+
8
+ #
9
+ # This method initializes the fedora repository and solr instance
10
+ #
11
+ def initialize_repository
12
+ Fedora::Repository.register( FEDORA_URL )
13
+ ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
14
+ end
15
+
16
+ #
17
+ # This method retrieves a comprehensive list of unique ids in the fedora repository
18
+ #
19
+ def self.get_pids( num_docs )
20
+ solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
21
+ id_array = []
22
+ solr_results.hits.each do |hit|
23
+ id_array << hit[SOLR_DOCUMENT_ID]
24
+ end
25
+ return id_array
26
+ end
27
+
28
+ #
29
+ # This method retrieves the object associated with the given unique id
30
+ #
31
+ def self.get_object( pid )
32
+ object = ActiveFedora::Base.load_instance( pid )
33
+ end
34
+
35
+ #
36
+ # This method retrieves a comprehensive list of datastreams for the given object
37
+ #
38
+ def self.get_datastreams( obj )
39
+ ds_keys = obj.datastreams.keys
40
+ end
41
+
42
+ #
43
+ # This method retrieves the datastream for the given object with the given datastream name
44
+ #
45
+ def self.get_datastream( obj, ds_name )
46
+ begin
47
+ obj.datastreams[ ds_name ]
48
+ rescue
49
+ return nil
50
+ end
51
+ end
52
+
53
+ end
54
+ end
data/lib/shelver.rb ADDED
@@ -0,0 +1,103 @@
1
+ require 'rubygems'
2
+ require 'shelver/indexer.rb'
3
+ # require 'fastercsv'
4
+ require "ruby-debug"
5
+
6
+
7
+
8
+ module Shelver
9
+ class Shelver
10
+
11
+ attr_accessor :indexer, :index_full_text
12
+
13
+ #
14
+ # This method initializes the indexer
15
+ # If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
16
+ #
17
+ def initialize( opts={} )
18
+ @@index_list = false unless defined?(@@index_list)
19
+ if opts[:index_full_text] == true || opts[:index_full_text] == "true"
20
+ @index_full_text = true
21
+ else
22
+ @index_full_text = false
23
+ end
24
+ @indexer = Indexer.new( :index_full_text=>@index_full_text )
25
+ end
26
+
27
+ #
28
+ # This method shelves the given Fedora object's full-text and facets into the search index
29
+ #
30
+ def shelve_object( obj )
31
+ # retrieve the Fedora object based on the given unique id
32
+
33
+ begin
34
+
35
+ start = Time.now
36
+ print "Retrieving object #{obj} ..."
37
+ obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
38
+
39
+ obj_done = Time.now
40
+ obj_done_elapse = obj_done - start
41
+ puts " completed. Duration: #{obj_done_elapse}"
42
+
43
+ unless obj.datastreams['descMetadata'].nil? || obj.datastreams['location'].nil?
44
+ print "\t Indexing object #{obj.pid} ... "
45
+ # add the keywords and facets to the search index
46
+ index_start = Time.now
47
+ indexer.index( obj )
48
+
49
+ index_done = Time.now
50
+ index_elapsed = index_done - index_start
51
+
52
+ puts "completed. Duration: #{index_elapsed} ."
53
+ end #unless
54
+
55
+
56
+ rescue Exception => e
57
+ p "unable to index #{obj}. Failed with #{e.inspect}"
58
+
59
+
60
+ end #begin
61
+
62
+ end
63
+
64
+ #
65
+ # This method retrieves a comprehensive list of all the unique identifiers in Fedora and
66
+ # shelves each object's full-text and facets into the search index
67
+ #
68
+ def shelve_objects
69
+ # retrieve a list of all the pids in the fedora repository
70
+ num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
71
+ puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
72
+
73
+ if @@index_list == false
74
+
75
+ pids = Repository.get_pids( num_docs )
76
+ puts "Shelving #{pids.length} Fedora objects"
77
+ pids.each do |pid|
78
+ unless pid[0].empty? || pid[0].nil?
79
+ shelve_object( pid )
80
+ end
81
+ end #pids.each
82
+
83
+ else
84
+
85
+ if File.exists?(@@index_list)
86
+ arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
87
+
88
+ puts "Indexing from list at #{@@index_list}"
89
+ puts "Shelving #{arr_of_pids.length} Fedora objects"
90
+
91
+ arr_of_pids.each do |row|
92
+ pid = row[0]
93
+ shelve_object( pid )
94
+ end #FASTERCSV
95
+ else
96
+ puts "#{@@index_list} does not exists!"
97
+ end #if File.exists
98
+
99
+ end #if Index_LISTS
100
+ end #shelve_objects
101
+
102
+ end #class
103
+ end #module
@@ -0,0 +1,33 @@
1
+ namespace :shelver do
2
+
3
+ desc 'Index a fedora object of the given pid.'
4
+ task :shelve_object => :environment do
5
+ index_full_text = ENV['FULL_TEXT'] == 'true'
6
+ if ENV['PID']
7
+ puts "indexing #{ENV['PID'].inspect}"
8
+ shelver = Shelver::Shelver.new :index_full_text=> index_full_text
9
+ shelver.shelve_object(ENV['PID'])
10
+ puts "Finished shelving #{ENV['PID']}"
11
+ else
12
+ puts "You must provide a pid using the format 'shelver::shelve_object PID=sample:pid'."
13
+ end
14
+ end
15
+
16
+ desc 'Index all objects in the repository.'
17
+ task :shelve_objects => :environment do
18
+ index_full_text = ENV['FULL_TEXT'] == 'true'
19
+ if ENV['INDEX_LIST']
20
+ @@index_list = ENV['INDEX_LIST']
21
+ end
22
+
23
+ puts "Re-indexing Fedora Repository."
24
+ puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
25
+ puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
26
+ puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
27
+ puts "Doing full text index." if index_full_text
28
+ shelver = Shelver::Shelver.new :index_full_text=> index_full_text
29
+ shelver.shelve_objects
30
+ puts "Shelver task complete."
31
+ end
32
+
33
+ end
data/shelver.gemspec ADDED
@@ -0,0 +1,74 @@
1
+ # Generated by jeweler
2
+ # DO NOT EDIT THIS FILE DIRECTLY
3
+ # Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
4
+ # -*- encoding: utf-8 -*-
5
+
6
+ Gem::Specification.new do |s|
7
+ s.name = %q{shelver}
8
+ s.version = "0.0.0"
9
+
10
+ s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
+ s.authors = ["Matt Zumwalt"]
12
+ s.date = %q{2010-03-24}
13
+ s.description = %q{Use shelver to populate solr indexes from Fedora repository content or from other sources. You can run shelver from within your apps, using the provided rake tasks, or as a JMS listener}
14
+ s.email = %q{matt.zumwalt@yourmediashelf.com}
15
+ s.extra_rdoc_files = [
16
+ "LICENSE",
17
+ "README.rdoc"
18
+ ]
19
+ s.files = [
20
+ ".gitignore",
21
+ "LICENSE",
22
+ "README.rdoc",
23
+ "Rakefile",
24
+ "VERSION",
25
+ "config/hydra_types.yml",
26
+ "config/solr.yml",
27
+ "lib/shelver.rb",
28
+ "lib/shelver/configuration.rb",
29
+ "lib/shelver/extractor.rb",
30
+ "lib/shelver/indexer.rb",
31
+ "lib/shelver/main.rb",
32
+ "lib/shelver/replicator.rb",
33
+ "lib/shelver/repository.rb",
34
+ "lib/tasks/shelver.rake",
35
+ "shelver.gemspec",
36
+ "spec/fixtures/druid-bv448hq0314-descMetadata.xml",
37
+ "spec/fixtures/druid-bv448hq0314-extProperties.xml",
38
+ "spec/fixtures/druid-cm234kq4672-extProperties.xml",
39
+ "spec/fixtures/druid-cm234kq4672-stories.xml",
40
+ "spec/fixtures/druid-hc513kw4806-descMetadata.xml",
41
+ "spec/fixtures/rels_ext_cmodel.xml",
42
+ "spec/rcov.opts",
43
+ "spec/spec.opts",
44
+ "spec/spec_helper.rb",
45
+ "spec/units/extractor_spec.rb",
46
+ "spec/units/indexer_spec.rb",
47
+ "spec/units/shelver_spec.rb"
48
+ ]
49
+ s.homepage = %q{http://github.com/mediashelf/shelver}
50
+ s.rdoc_options = ["--charset=UTF-8"]
51
+ s.require_paths = ["lib"]
52
+ s.rubygems_version = %q{1.3.6}
53
+ s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
54
+ s.test_files = [
55
+ "spec/spec_helper.rb",
56
+ "spec/units/extractor_spec.rb",
57
+ "spec/units/indexer_spec.rb",
58
+ "spec/units/shelver_spec.rb"
59
+ ]
60
+
61
+ if s.respond_to? :specification_version then
62
+ current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
63
+ s.specification_version = 3
64
+
65
+ if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
66
+ s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
67
+ else
68
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
69
+ end
70
+ else
71
+ s.add_dependency(%q<rspec>, [">= 1.2.9"])
72
+ end
73
+ end
74
+
@@ -0,0 +1,11 @@
1
+ <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
+ <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
+ <dcterms:medium>Paper Document</dcterms:medium>
4
+ <dcterms:rights>Presumed under copyright. Do not publish.</dcterms:rights>
5
+ <dcterms:date>1985-12-30</dcterms:date>
6
+ <dcterms:format>application/tiff</dcterms:format>
7
+ <dcterms:format>application/jp2000</dcterms:format>
8
+ <dcterms:format>application/pdf</dcterms:format>
9
+ <dcterms:title>This is a Sample Title</dcterms:title>
10
+ <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
+ </dc>
@@ -0,0 +1,52 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <document>
3
+ <attributes>
4
+ <attribute type="item">5958</attribute>
5
+ <attribute type="objectid">FEI0010-00013142</attribute>
6
+ <attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
7
+ <attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
8
+ <attribute type="description"/>
9
+ <attribute type="date">1985-12-30</attribute>
10
+ <attribute type="datestr">30/12/1985</attribute>
11
+ <attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
12
+ <attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
13
+ <attribute type="url"/>
14
+ <attribute type="industryterm"/>
15
+ <attribute type="technology">artificial intelligence</attribute>
16
+ <attribute type="company"/>
17
+ <attribute type="person">ELLIE ENGELMORE</attribute>
18
+ <attribute type="year">1985</attribute>
19
+ <attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
20
+ <attribute type="sourcelocation">Folder 15</attribute>
21
+ </attributes>
22
+ <facets>
23
+ <facet type="year" id="49">1980s</facet>
24
+ <facet type="year" id="49">1985</facet>
25
+ <facet type="year" id="42">1980s</facet>
26
+ <facet type="sourcelocation" id="592">Feigenbaum</facet>
27
+ <facet type="sourcelocation" id="592">eaf7000</facet>
28
+ <facet type="sourcelocation" id="592">Box 51A</facet>
29
+ <facet type="sourcelocation" id="594">Feigenbaum</facet>
30
+ <facet type="sourcelocation" id="594">eaf7000</facet>
31
+ <facet type="sourcelocation" id="594">Box 51A</facet>
32
+ <facet type="sourcelocation" id="594">Folder 15</facet>
33
+ <facet type="sourcelocation" id="691">Feigenbaum</facet>
34
+ <facet type="sourcelocation" id="692">Feigenbaum</facet>
35
+ <facet type="sourcelocation" id="692">eaf7000</facet>
36
+ <facet type="doctype" id="32">Correspondence</facet>
37
+ <facet type="city" id="82">Ann Arbor</facet>
38
+ <facet type="city" id="910">Hyderabad</facet>
39
+ <facet type="city" id="1519">Palo Alto</facet>
40
+ <facet type="country" id="68">India</facet>
41
+ <facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
42
+ <facet type="organization" id="5065">Heuristic Programming Project</facet>
43
+ <facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
44
+ <facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
45
+ <facet type="person" id="5810">ELLIE ENGELMORE</facet>
46
+ <facet type="person" id="17934">Reddy</facet>
47
+ <facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
48
+ <facet type="provinceorstate" id="96">Michigan</facet>
49
+ <facet type="provinceorstate" id="27">California</facet>
50
+ <facet type="technology" id="1713">artificial intelligence</facet>
51
+ </facets>
52
+ </document>
@@ -0,0 +1,5 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <document>
3
+ <attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
4
+ <facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
5
+ </document>
@@ -0,0 +1,17 @@
1
+
2
+ <html>
3
+ <body>
4
+ <pre>
5
+ This is
6
+ preformatted text.
7
+ It preserves both spaces
8
+ and line breaks.
9
+ </pre>
10
+ <p>The pre tag is good for displaying computer code:</p>
11
+ <pre>
12
+ for i = 1 to 10
13
+ print i
14
+ next i
15
+ </pre>
16
+ </body>
17
+ </html>
@@ -0,0 +1,11 @@
1
+ <dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
2
+ <dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
3
+ <dcterms:medium>Paper Document</dcterms:medium>
4
+ <dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
5
+ <dcterms:date/>
6
+ <dcterms:format>application/tiff</dcterms:format>
7
+ <dcterms:format>application/jp2000</dcterms:format>
8
+ <dcterms:format>application/pdf</dcterms:format>
9
+ <dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
10
+ <dcterms:publisher>Sample Unversity</dcterms:publisher>
11
+ </dc>
@@ -0,0 +1,8 @@
1
+ <rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
2
+ <rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
3
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
4
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
5
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
6
+ <hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
7
+ </rdf:Description>
8
+ </rdf:RDF>
data/spec/rcov.opts ADDED
@@ -0,0 +1,2 @@
1
+ --exclude "spec/*,gems/*"
2
+ --rails
data/spec/spec.opts ADDED
@@ -0,0 +1 @@
1
+ --color
@@ -0,0 +1,16 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__))
2
+ $LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
3
+ require 'shelver'
4
+ require 'spec'
5
+ require 'spec/autorun'
6
+
7
+ Spec::Runner.configure do |config|
8
+
9
+ config.mock_with :mocha
10
+
11
+
12
+ def fixture(file)
13
+ File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
14
+ end
15
+
16
+ end
@@ -0,0 +1,50 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Extractor do
5
+
6
+ before(:each) do
7
+ @extractor = Shelver::Extractor.new
8
+ end
9
+
10
+ describe ".xml_to_solr" do
11
+ it "should turn simple xml into a solr document" do
12
+ desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
13
+ result = @extractor.xml_to_solr(desc_meta)
14
+ result[:type_t].should == "text"
15
+ result[:medium_t].should == "Paper Document"
16
+ result[:rights_t].should == "Presumed under copyright. Do not publish."
17
+ result[:date_t].should == "1985-12-30"
18
+ result[:format_t].should == "application/tiff"
19
+ result[:title_t].should == "This is a Sample Title"
20
+ result[:publisher_t].should == "Sample Unversity"
21
+
22
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
23
+ result.inspect.include?('@value="application/tiff"').should be_true
24
+ result.inspect.include?('@value="application/pdf"').should be_true
25
+ end
26
+ end
27
+
28
+ describe "extract_rels_ext" do
29
+ it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
30
+ rels_ext = fixture("rels_ext_cmodel.xml")
31
+ result = @extractor.extract_rels_ext( rels_ext )
32
+ result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
33
+ result[:hydra_type_t].should == "salt_document"
34
+
35
+ # ... and a hacky way of making sure that it added a field for each of the dc:medium values
36
+ result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
37
+ result.inspect.include?('@value="jp2_document"').should be_true
38
+ end
39
+ end
40
+
41
+ describe "extract_hydra_types" do
42
+ it "should extract the hydra_type of a Fedora object" do
43
+ rels_ext = fixture("rels_ext_cmodel.xml")
44
+ result = @extractor.extract_rels_ext( rels_ext )
45
+ result[:hydra_type_t].should == "salt_document"
46
+ end
47
+ end
48
+
49
+
50
+ end
@@ -0,0 +1,127 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Indexer do
5
+
6
+ before(:each) do
7
+ Shelver::Indexer.any_instance.stubs(:connect).returns("foo")
8
+
9
+ @extractor = mock("Extractor")
10
+ @extractor.stubs(:html_content_to_solr).returns(@solr_doc)
11
+ # @solr_doc = mock('solr_doc')
12
+ # @solr_doc.stubs(:<<)
13
+ # @solr_doc.stubs(:[])
14
+
15
+ @solr_doc = Solr::Document.new
16
+
17
+ Shelver::Extractor.expects(:new).returns(@extractor)
18
+ @indexer = Shelver::Indexer.new
19
+
20
+ end
21
+
22
+ describe "#generate_dates" do
23
+ it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
24
+
25
+ solr_result = @indexer.generate_dates(@solr_doc)
26
+ solr_result.should be_kind_of Solr::Document
27
+ solr_result[:date_t].should == "9999-99-99"
28
+ solr_result[:month_facet].should == "99"
29
+ solr_result[:day_facet].should == '99'
30
+
31
+ end
32
+
33
+ it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
34
+
35
+ @solr_doc << Solr::Field.new(:date_t => "Unknown")
36
+ solr_result = @indexer.generate_dates(@solr_doc)
37
+ solr_result.should be_kind_of Solr::Document
38
+ solr_result[:date_t].should == "Unknown"
39
+ solr_result[:month_facet].should == "99"
40
+ solr_result[:day_facet].should == '99'
41
+
42
+ end
43
+
44
+ it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
45
+
46
+ @solr_doc << Solr::Field.new(:date_t => "0000-13-11")
47
+ solr_result = @indexer.generate_dates(@solr_doc)
48
+ solr_result.should be_kind_of Solr::Document
49
+ solr_result[:date_t].should == "0000-13-11"
50
+ solr_result[:month_facet].should == "99"
51
+ solr_result[:day_facet].should == '11'
52
+ end
53
+
54
+ it "should give month and day when in a valid date format" do
55
+ @solr_doc << Solr::Field.new(:date_t => "1978-04-11")
56
+ solr_result = @indexer.generate_dates(@solr_doc)
57
+ solr_result.should be_kind_of Solr::Document
58
+ solr_result[:date_t].should == "1978-04-11"
59
+ solr_result[:month_facet].should == "04"
60
+ solr_result[:day_facet].should == '11'
61
+
62
+ end
63
+
64
+ it "should still give two digit strings even if the month/day is single digit" do
65
+
66
+ @solr_doc << Solr::Field.new(:date_t => "1978-4-1")
67
+ solr_result = @indexer.generate_dates(@solr_doc)
68
+ solr_result.should be_kind_of Solr::Document
69
+ solr_result[:date_t].should == "1978-4-1"
70
+ solr_result[:month_facet].should == "04"
71
+ solr_result[:day_facet].should == '01'
72
+
73
+ end
74
+
75
+ end
76
+
77
+
78
+
79
+ describe "#solrize" do
80
+ it "should convert a hash to a solr doc" do
81
+ example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
82
+
83
+ example_result = Shelver::Indexer.solrize( example_hash )
84
+ example_result.should be_kind_of Solr::Document
85
+ example_hash.each_pair do |key,values|
86
+ if values.class == String
87
+ example_result["#{key}_facet"].should == values
88
+ else
89
+ values.each do |v|
90
+ example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
91
+ example_result.inspect.include?("@value=\"#{v}\"").should be_true
92
+ end
93
+ end
94
+ end
95
+ end
96
+
97
+ it "should handle hashes with facets listed in a sub-hash" do
98
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
99
+ result = Shelver::Indexer.solrize( simple_hash )
100
+ result.should be_kind_of Solr::Document
101
+ result["technology_facet"].should == "t1"
102
+ result.inspect.include?('@boost=nil').should be_true
103
+ result.inspect.include?('@name="technology_facet"').should be_true
104
+ result.inspect.include?('@value="t2"').should be_true
105
+ result["company_facet"].should == "c1"
106
+ result["person_facet"].should == "p1"
107
+ result.inspect.include?('@name="person_facet"').should be_true
108
+ result.inspect.include?('@value="p2"').should be_true
109
+
110
+ end
111
+
112
+ it "should create symbols from the :symbols subhash" do
113
+ simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
114
+ result = Shelver::Indexer.solrize( simple_hash )
115
+ result.should be_kind_of Solr::Document
116
+ result["technology_s"].should == "t1"
117
+ result.inspect.include?('@name="technology_s"').should be_true
118
+ result.inspect.include?('@value="t2"').should be_true
119
+
120
+ result["company_s"].should == "c1"
121
+ result["person_s"].should == "p1"
122
+ result.inspect.include?('@name="person_s"').should be_true
123
+ result.inspect.include?('@value="p2"').should be_true
124
+
125
+ end
126
+ end
127
+ end
@@ -0,0 +1,42 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'shelver'
3
+
4
+ describe Shelver::Shelver do
5
+
6
+ before(:each) do
7
+ @shelver = Shelver::Shelver.new
8
+ end
9
+
10
+ describe "shelve_object" do
11
+ it "should trigger the indexer for the provided object" do
12
+ # sample_obj = ActiveFedora::Base.new
13
+ mock_object = mock("my object")
14
+ mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
+ mock_object.stubs(:pid)
16
+ mock_object.stubs(:label)
17
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
+ ActiveFedora::Base.expects(:load_instance).never
19
+ @shelver.indexer.expects(:index).with( mock_object )
20
+ @shelver.shelve_object( mock_object )
21
+ end
22
+ it "should still load the object if only a pid is provided" do
23
+ mock_object = mock("my object")
24
+ mock_object.stubs(:pid)
25
+ mock_object.stubs(:label)
26
+ mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
27
+
28
+ ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
29
+ @shelver.indexer.expects(:index).with(mock_object)
30
+ @shelver.shelve_object("_PID_")
31
+ end
32
+ end
33
+
34
+ describe "shelve_objects" do
35
+ it "should call shelve_object for each pid returned by solr" do
36
+ pids = [["pid1"], ["pid2"], ["pid3"]]
37
+ Shelver::Repository.expects(:get_pids).returns(pids)
38
+ pids.each {|pid| @shelver.expects(:shelve_object).with( pid ) }
39
+ @shelver.shelve_objects
40
+ end
41
+ end
42
+ end