shelver 0.0.0
Sign up to get free protection for your applications and to get access to all the features.
- data/.gitignore +19 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/config/hydra_types.yml +4 -0
- data/config/solr.yml +24 -0
- data/lib/shelver/configuration.rb +8 -0
- data/lib/shelver/extractor.rb +89 -0
- data/lib/shelver/indexer.rb +251 -0
- data/lib/shelver/main.rb +17 -0
- data/lib/shelver/replicator.rb +143 -0
- data/lib/shelver/repository.rb +54 -0
- data/lib/shelver.rb +103 -0
- data/lib/tasks/shelver.rake +33 -0
- data/shelver.gemspec +74 -0
- data/spec/fixtures/druid-bv448hq0314-descMetadata.xml +11 -0
- data/spec/fixtures/druid-bv448hq0314-extProperties.xml +52 -0
- data/spec/fixtures/druid-cm234kq4672-extProperties.xml +5 -0
- data/spec/fixtures/druid-cm234kq4672-stories.xml +17 -0
- data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +11 -0
- data/spec/fixtures/rels_ext_cmodel.xml +8 -0
- data/spec/rcov.opts +2 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/units/extractor_spec.rb +50 -0
- data/spec/units/indexer_spec.rb +127 -0
- data/spec/units/shelver_spec.rb +42 -0
- metadata +106 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
|
2
|
+
require 'active-fedora'
|
3
|
+
|
4
|
+
|
5
|
+
module Shelver
|
6
|
+
class Repository
|
7
|
+
|
8
|
+
#
|
9
|
+
# This method initializes the fedora repository and solr instance
|
10
|
+
#
|
11
|
+
def initialize_repository
|
12
|
+
Fedora::Repository.register( FEDORA_URL )
|
13
|
+
ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# This method retrieves a comprehensive list of unique ids in the fedora repository
|
18
|
+
#
|
19
|
+
def self.get_pids( num_docs )
|
20
|
+
solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
|
21
|
+
id_array = []
|
22
|
+
solr_results.hits.each do |hit|
|
23
|
+
id_array << hit[SOLR_DOCUMENT_ID]
|
24
|
+
end
|
25
|
+
return id_array
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# This method retrieves the object associated with the given unique id
|
30
|
+
#
|
31
|
+
def self.get_object( pid )
|
32
|
+
object = ActiveFedora::Base.load_instance( pid )
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# This method retrieves a comprehensive list of datastreams for the given object
|
37
|
+
#
|
38
|
+
def self.get_datastreams( obj )
|
39
|
+
ds_keys = obj.datastreams.keys
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# This method retrieves the datastream for the given object with the given datastream name
|
44
|
+
#
|
45
|
+
def self.get_datastream( obj, ds_name )
|
46
|
+
begin
|
47
|
+
obj.datastreams[ ds_name ]
|
48
|
+
rescue
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
data/lib/shelver.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'shelver/indexer.rb'
|
3
|
+
# require 'fastercsv'
|
4
|
+
require "ruby-debug"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
module Shelver
|
9
|
+
class Shelver
|
10
|
+
|
11
|
+
attr_accessor :indexer, :index_full_text
|
12
|
+
|
13
|
+
#
|
14
|
+
# This method initializes the indexer
|
15
|
+
# If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
|
16
|
+
#
|
17
|
+
def initialize( opts={} )
|
18
|
+
@@index_list = false unless defined?(@@index_list)
|
19
|
+
if opts[:index_full_text] == true || opts[:index_full_text] == "true"
|
20
|
+
@index_full_text = true
|
21
|
+
else
|
22
|
+
@index_full_text = false
|
23
|
+
end
|
24
|
+
@indexer = Indexer.new( :index_full_text=>@index_full_text )
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# This method shelves the given Fedora object's full-text and facets into the search index
|
29
|
+
#
|
30
|
+
def shelve_object( obj )
|
31
|
+
# retrieve the Fedora object based on the given unique id
|
32
|
+
|
33
|
+
begin
|
34
|
+
|
35
|
+
start = Time.now
|
36
|
+
print "Retrieving object #{obj} ..."
|
37
|
+
obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
|
38
|
+
|
39
|
+
obj_done = Time.now
|
40
|
+
obj_done_elapse = obj_done - start
|
41
|
+
puts " completed. Duration: #{obj_done_elapse}"
|
42
|
+
|
43
|
+
unless obj.datastreams['descMetadata'].nil? || obj.datastreams['location'].nil?
|
44
|
+
print "\t Indexing object #{obj.pid} ... "
|
45
|
+
# add the keywords and facets to the search index
|
46
|
+
index_start = Time.now
|
47
|
+
indexer.index( obj )
|
48
|
+
|
49
|
+
index_done = Time.now
|
50
|
+
index_elapsed = index_done - index_start
|
51
|
+
|
52
|
+
puts "completed. Duration: #{index_elapsed} ."
|
53
|
+
end #unless
|
54
|
+
|
55
|
+
|
56
|
+
rescue Exception => e
|
57
|
+
p "unable to index #{obj}. Failed with #{e.inspect}"
|
58
|
+
|
59
|
+
|
60
|
+
end #begin
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# This method retrieves a comprehensive list of all the unique identifiers in Fedora and
|
66
|
+
# shelves each object's full-text and facets into the search index
|
67
|
+
#
|
68
|
+
def shelve_objects
|
69
|
+
# retrieve a list of all the pids in the fedora repository
|
70
|
+
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
|
71
|
+
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
|
72
|
+
|
73
|
+
if @@index_list == false
|
74
|
+
|
75
|
+
pids = Repository.get_pids( num_docs )
|
76
|
+
puts "Shelving #{pids.length} Fedora objects"
|
77
|
+
pids.each do |pid|
|
78
|
+
unless pid[0].empty? || pid[0].nil?
|
79
|
+
shelve_object( pid )
|
80
|
+
end
|
81
|
+
end #pids.each
|
82
|
+
|
83
|
+
else
|
84
|
+
|
85
|
+
if File.exists?(@@index_list)
|
86
|
+
arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
|
87
|
+
|
88
|
+
puts "Indexing from list at #{@@index_list}"
|
89
|
+
puts "Shelving #{arr_of_pids.length} Fedora objects"
|
90
|
+
|
91
|
+
arr_of_pids.each do |row|
|
92
|
+
pid = row[0]
|
93
|
+
shelve_object( pid )
|
94
|
+
end #FASTERCSV
|
95
|
+
else
|
96
|
+
puts "#{@@index_list} does not exists!"
|
97
|
+
end #if File.exists
|
98
|
+
|
99
|
+
end #if Index_LISTS
|
100
|
+
end #shelve_objects
|
101
|
+
|
102
|
+
end #class
|
103
|
+
end #module
|
@@ -0,0 +1,33 @@
|
|
1
|
+
namespace :shelver do
|
2
|
+
|
3
|
+
desc 'Index a fedora object of the given pid.'
|
4
|
+
task :shelve_object => :environment do
|
5
|
+
index_full_text = ENV['FULL_TEXT'] == 'true'
|
6
|
+
if ENV['PID']
|
7
|
+
puts "indexing #{ENV['PID'].inspect}"
|
8
|
+
shelver = Shelver::Shelver.new :index_full_text=> index_full_text
|
9
|
+
shelver.shelve_object(ENV['PID'])
|
10
|
+
puts "Finished shelving #{ENV['PID']}"
|
11
|
+
else
|
12
|
+
puts "You must provide a pid using the format 'shelver::shelve_object PID=sample:pid'."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Index all objects in the repository.'
|
17
|
+
task :shelve_objects => :environment do
|
18
|
+
index_full_text = ENV['FULL_TEXT'] == 'true'
|
19
|
+
if ENV['INDEX_LIST']
|
20
|
+
@@index_list = ENV['INDEX_LIST']
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Re-indexing Fedora Repository."
|
24
|
+
puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
|
25
|
+
puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
|
26
|
+
puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
|
27
|
+
puts "Doing full text index." if index_full_text
|
28
|
+
shelver = Shelver::Shelver.new :index_full_text=> index_full_text
|
29
|
+
shelver.shelve_objects
|
30
|
+
puts "Shelver task complete."
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
data/shelver.gemspec
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{shelver}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Matt Zumwalt"]
|
12
|
+
s.date = %q{2010-03-24}
|
13
|
+
s.description = %q{Use shelver to populate solr indexes from Fedora repository content or from other sources. You can run shelver from within your apps, using the provided rake tasks, or as a JMS listener}
|
14
|
+
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENSE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"config/hydra_types.yml",
|
26
|
+
"config/solr.yml",
|
27
|
+
"lib/shelver.rb",
|
28
|
+
"lib/shelver/configuration.rb",
|
29
|
+
"lib/shelver/extractor.rb",
|
30
|
+
"lib/shelver/indexer.rb",
|
31
|
+
"lib/shelver/main.rb",
|
32
|
+
"lib/shelver/replicator.rb",
|
33
|
+
"lib/shelver/repository.rb",
|
34
|
+
"lib/tasks/shelver.rake",
|
35
|
+
"shelver.gemspec",
|
36
|
+
"spec/fixtures/druid-bv448hq0314-descMetadata.xml",
|
37
|
+
"spec/fixtures/druid-bv448hq0314-extProperties.xml",
|
38
|
+
"spec/fixtures/druid-cm234kq4672-extProperties.xml",
|
39
|
+
"spec/fixtures/druid-cm234kq4672-stories.xml",
|
40
|
+
"spec/fixtures/druid-hc513kw4806-descMetadata.xml",
|
41
|
+
"spec/fixtures/rels_ext_cmodel.xml",
|
42
|
+
"spec/rcov.opts",
|
43
|
+
"spec/spec.opts",
|
44
|
+
"spec/spec_helper.rb",
|
45
|
+
"spec/units/extractor_spec.rb",
|
46
|
+
"spec/units/indexer_spec.rb",
|
47
|
+
"spec/units/shelver_spec.rb"
|
48
|
+
]
|
49
|
+
s.homepage = %q{http://github.com/mediashelf/shelver}
|
50
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
51
|
+
s.require_paths = ["lib"]
|
52
|
+
s.rubygems_version = %q{1.3.6}
|
53
|
+
s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
|
54
|
+
s.test_files = [
|
55
|
+
"spec/spec_helper.rb",
|
56
|
+
"spec/units/extractor_spec.rb",
|
57
|
+
"spec/units/indexer_spec.rb",
|
58
|
+
"spec/units/shelver_spec.rb"
|
59
|
+
]
|
60
|
+
|
61
|
+
if s.respond_to? :specification_version then
|
62
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
63
|
+
s.specification_version = 3
|
64
|
+
|
65
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
66
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
67
|
+
else
|
68
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
69
|
+
end
|
70
|
+
else
|
71
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
+
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
+
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
+
<dcterms:rights>Presumed under copyright. Do not publish.</dcterms:rights>
|
5
|
+
<dcterms:date>1985-12-30</dcterms:date>
|
6
|
+
<dcterms:format>application/tiff</dcterms:format>
|
7
|
+
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
+
<dcterms:format>application/pdf</dcterms:format>
|
9
|
+
<dcterms:title>This is a Sample Title</dcterms:title>
|
10
|
+
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
+
</dc>
|
@@ -0,0 +1,52 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<document>
|
3
|
+
<attributes>
|
4
|
+
<attribute type="item">5958</attribute>
|
5
|
+
<attribute type="objectid">FEI0010-00013142</attribute>
|
6
|
+
<attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
|
7
|
+
<attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
|
8
|
+
<attribute type="description"/>
|
9
|
+
<attribute type="date">1985-12-30</attribute>
|
10
|
+
<attribute type="datestr">30/12/1985</attribute>
|
11
|
+
<attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
|
12
|
+
<attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
|
13
|
+
<attribute type="url"/>
|
14
|
+
<attribute type="industryterm"/>
|
15
|
+
<attribute type="technology">artificial intelligence</attribute>
|
16
|
+
<attribute type="company"/>
|
17
|
+
<attribute type="person">ELLIE ENGELMORE</attribute>
|
18
|
+
<attribute type="year">1985</attribute>
|
19
|
+
<attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
|
20
|
+
<attribute type="sourcelocation">Folder 15</attribute>
|
21
|
+
</attributes>
|
22
|
+
<facets>
|
23
|
+
<facet type="year" id="49">1980s</facet>
|
24
|
+
<facet type="year" id="49">1985</facet>
|
25
|
+
<facet type="year" id="42">1980s</facet>
|
26
|
+
<facet type="sourcelocation" id="592">Feigenbaum</facet>
|
27
|
+
<facet type="sourcelocation" id="592">eaf7000</facet>
|
28
|
+
<facet type="sourcelocation" id="592">Box 51A</facet>
|
29
|
+
<facet type="sourcelocation" id="594">Feigenbaum</facet>
|
30
|
+
<facet type="sourcelocation" id="594">eaf7000</facet>
|
31
|
+
<facet type="sourcelocation" id="594">Box 51A</facet>
|
32
|
+
<facet type="sourcelocation" id="594">Folder 15</facet>
|
33
|
+
<facet type="sourcelocation" id="691">Feigenbaum</facet>
|
34
|
+
<facet type="sourcelocation" id="692">Feigenbaum</facet>
|
35
|
+
<facet type="sourcelocation" id="692">eaf7000</facet>
|
36
|
+
<facet type="doctype" id="32">Correspondence</facet>
|
37
|
+
<facet type="city" id="82">Ann Arbor</facet>
|
38
|
+
<facet type="city" id="910">Hyderabad</facet>
|
39
|
+
<facet type="city" id="1519">Palo Alto</facet>
|
40
|
+
<facet type="country" id="68">India</facet>
|
41
|
+
<facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
|
42
|
+
<facet type="organization" id="5065">Heuristic Programming Project</facet>
|
43
|
+
<facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
|
44
|
+
<facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
|
45
|
+
<facet type="person" id="5810">ELLIE ENGELMORE</facet>
|
46
|
+
<facet type="person" id="17934">Reddy</facet>
|
47
|
+
<facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
|
48
|
+
<facet type="provinceorstate" id="96">Michigan</facet>
|
49
|
+
<facet type="provinceorstate" id="27">California</facet>
|
50
|
+
<facet type="technology" id="1713">artificial intelligence</facet>
|
51
|
+
</facets>
|
52
|
+
</document>
|
@@ -0,0 +1,5 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<document>
|
3
|
+
<attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
|
4
|
+
<facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
|
5
|
+
</document>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
<html>
|
3
|
+
<body>
|
4
|
+
<pre>
|
5
|
+
This is
|
6
|
+
preformatted text.
|
7
|
+
It preserves both spaces
|
8
|
+
and line breaks.
|
9
|
+
</pre>
|
10
|
+
<p>The pre tag is good for displaying computer code:</p>
|
11
|
+
<pre>
|
12
|
+
for i = 1 to 10
|
13
|
+
print i
|
14
|
+
next i
|
15
|
+
</pre>
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
+
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
+
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
+
<dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
|
5
|
+
<dcterms:date/>
|
6
|
+
<dcterms:format>application/tiff</dcterms:format>
|
7
|
+
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
+
<dcterms:format>application/pdf</dcterms:format>
|
9
|
+
<dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
|
10
|
+
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
+
</dc>
|
@@ -0,0 +1,8 @@
|
|
1
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
2
|
+
<rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
|
3
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
|
4
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
|
5
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
|
6
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
|
7
|
+
</rdf:Description>
|
8
|
+
</rdf:RDF>
|
data/spec/rcov.opts
ADDED
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'shelver'
|
4
|
+
require 'spec'
|
5
|
+
require 'spec/autorun'
|
6
|
+
|
7
|
+
Spec::Runner.configure do |config|
|
8
|
+
|
9
|
+
config.mock_with :mocha
|
10
|
+
|
11
|
+
|
12
|
+
def fixture(file)
|
13
|
+
File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Extractor do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@extractor = Shelver::Extractor.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe ".xml_to_solr" do
|
11
|
+
it "should turn simple xml into a solr document" do
|
12
|
+
desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
|
13
|
+
result = @extractor.xml_to_solr(desc_meta)
|
14
|
+
result[:type_t].should == "text"
|
15
|
+
result[:medium_t].should == "Paper Document"
|
16
|
+
result[:rights_t].should == "Presumed under copyright. Do not publish."
|
17
|
+
result[:date_t].should == "1985-12-30"
|
18
|
+
result[:format_t].should == "application/tiff"
|
19
|
+
result[:title_t].should == "This is a Sample Title"
|
20
|
+
result[:publisher_t].should == "Sample Unversity"
|
21
|
+
|
22
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
23
|
+
result.inspect.include?('@value="application/tiff"').should be_true
|
24
|
+
result.inspect.include?('@value="application/pdf"').should be_true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "extract_rels_ext" do
|
29
|
+
it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
|
30
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
31
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
32
|
+
result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
|
33
|
+
result[:hydra_type_t].should == "salt_document"
|
34
|
+
|
35
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
36
|
+
result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
|
37
|
+
result.inspect.include?('@value="jp2_document"').should be_true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "extract_hydra_types" do
|
42
|
+
it "should extract the hydra_type of a Fedora object" do
|
43
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
44
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
45
|
+
result[:hydra_type_t].should == "salt_document"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Indexer do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
Shelver::Indexer.any_instance.stubs(:connect).returns("foo")
|
8
|
+
|
9
|
+
@extractor = mock("Extractor")
|
10
|
+
@extractor.stubs(:html_content_to_solr).returns(@solr_doc)
|
11
|
+
# @solr_doc = mock('solr_doc')
|
12
|
+
# @solr_doc.stubs(:<<)
|
13
|
+
# @solr_doc.stubs(:[])
|
14
|
+
|
15
|
+
@solr_doc = Solr::Document.new
|
16
|
+
|
17
|
+
Shelver::Extractor.expects(:new).returns(@extractor)
|
18
|
+
@indexer = Shelver::Indexer.new
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "#generate_dates" do
|
23
|
+
it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
|
24
|
+
|
25
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
26
|
+
solr_result.should be_kind_of Solr::Document
|
27
|
+
solr_result[:date_t].should == "9999-99-99"
|
28
|
+
solr_result[:month_facet].should == "99"
|
29
|
+
solr_result[:day_facet].should == '99'
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
|
34
|
+
|
35
|
+
@solr_doc << Solr::Field.new(:date_t => "Unknown")
|
36
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
37
|
+
solr_result.should be_kind_of Solr::Document
|
38
|
+
solr_result[:date_t].should == "Unknown"
|
39
|
+
solr_result[:month_facet].should == "99"
|
40
|
+
solr_result[:day_facet].should == '99'
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
|
45
|
+
|
46
|
+
@solr_doc << Solr::Field.new(:date_t => "0000-13-11")
|
47
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
48
|
+
solr_result.should be_kind_of Solr::Document
|
49
|
+
solr_result[:date_t].should == "0000-13-11"
|
50
|
+
solr_result[:month_facet].should == "99"
|
51
|
+
solr_result[:day_facet].should == '11'
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should give month and day when in a valid date format" do
|
55
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-04-11")
|
56
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
57
|
+
solr_result.should be_kind_of Solr::Document
|
58
|
+
solr_result[:date_t].should == "1978-04-11"
|
59
|
+
solr_result[:month_facet].should == "04"
|
60
|
+
solr_result[:day_facet].should == '11'
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should still give two digit strings even if the month/day is single digit" do
|
65
|
+
|
66
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-4-1")
|
67
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
68
|
+
solr_result.should be_kind_of Solr::Document
|
69
|
+
solr_result[:date_t].should == "1978-4-1"
|
70
|
+
solr_result[:month_facet].should == "04"
|
71
|
+
solr_result[:day_facet].should == '01'
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
describe "#solrize" do
|
80
|
+
it "should convert a hash to a solr doc" do
|
81
|
+
example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
|
82
|
+
|
83
|
+
example_result = Shelver::Indexer.solrize( example_hash )
|
84
|
+
example_result.should be_kind_of Solr::Document
|
85
|
+
example_hash.each_pair do |key,values|
|
86
|
+
if values.class == String
|
87
|
+
example_result["#{key}_facet"].should == values
|
88
|
+
else
|
89
|
+
values.each do |v|
|
90
|
+
example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
|
91
|
+
example_result.inspect.include?("@value=\"#{v}\"").should be_true
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should handle hashes with facets listed in a sub-hash" do
|
98
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
99
|
+
result = Shelver::Indexer.solrize( simple_hash )
|
100
|
+
result.should be_kind_of Solr::Document
|
101
|
+
result["technology_facet"].should == "t1"
|
102
|
+
result.inspect.include?('@boost=nil').should be_true
|
103
|
+
result.inspect.include?('@name="technology_facet"').should be_true
|
104
|
+
result.inspect.include?('@value="t2"').should be_true
|
105
|
+
result["company_facet"].should == "c1"
|
106
|
+
result["person_facet"].should == "p1"
|
107
|
+
result.inspect.include?('@name="person_facet"').should be_true
|
108
|
+
result.inspect.include?('@value="p2"').should be_true
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should create symbols from the :symbols subhash" do
|
113
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
114
|
+
result = Shelver::Indexer.solrize( simple_hash )
|
115
|
+
result.should be_kind_of Solr::Document
|
116
|
+
result["technology_s"].should == "t1"
|
117
|
+
result.inspect.include?('@name="technology_s"').should be_true
|
118
|
+
result.inspect.include?('@value="t2"').should be_true
|
119
|
+
|
120
|
+
result["company_s"].should == "c1"
|
121
|
+
result["person_s"].should == "p1"
|
122
|
+
result.inspect.include?('@name="person_s"').should be_true
|
123
|
+
result.inspect.include?('@value="p2"').should be_true
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Shelver do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@shelver = Shelver::Shelver.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "shelve_object" do
|
11
|
+
it "should trigger the indexer for the provided object" do
|
12
|
+
# sample_obj = ActiveFedora::Base.new
|
13
|
+
mock_object = mock("my object")
|
14
|
+
mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
|
15
|
+
mock_object.stubs(:pid)
|
16
|
+
mock_object.stubs(:label)
|
17
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
18
|
+
ActiveFedora::Base.expects(:load_instance).never
|
19
|
+
@shelver.indexer.expects(:index).with( mock_object )
|
20
|
+
@shelver.shelve_object( mock_object )
|
21
|
+
end
|
22
|
+
it "should still load the object if only a pid is provided" do
|
23
|
+
mock_object = mock("my object")
|
24
|
+
mock_object.stubs(:pid)
|
25
|
+
mock_object.stubs(:label)
|
26
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
27
|
+
|
28
|
+
ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
|
29
|
+
@shelver.indexer.expects(:index).with(mock_object)
|
30
|
+
@shelver.shelve_object("_PID_")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe "shelve_objects" do
|
35
|
+
it "should call shelve_object for each pid returned by solr" do
|
36
|
+
pids = [["pid1"], ["pid2"], ["pid3"]]
|
37
|
+
Shelver::Repository.expects(:get_pids).returns(pids)
|
38
|
+
pids.each {|pid| @shelver.expects(:shelve_object).with( pid ) }
|
39
|
+
@shelver.shelve_objects
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|