shelver 0.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +19 -0
- data/LICENSE +20 -0
- data/README.rdoc +17 -0
- data/Rakefile +45 -0
- data/VERSION +1 -0
- data/config/hydra_types.yml +4 -0
- data/config/solr.yml +24 -0
- data/lib/shelver/configuration.rb +8 -0
- data/lib/shelver/extractor.rb +89 -0
- data/lib/shelver/indexer.rb +251 -0
- data/lib/shelver/main.rb +17 -0
- data/lib/shelver/replicator.rb +143 -0
- data/lib/shelver/repository.rb +54 -0
- data/lib/shelver.rb +103 -0
- data/lib/tasks/shelver.rake +33 -0
- data/shelver.gemspec +74 -0
- data/spec/fixtures/druid-bv448hq0314-descMetadata.xml +11 -0
- data/spec/fixtures/druid-bv448hq0314-extProperties.xml +52 -0
- data/spec/fixtures/druid-cm234kq4672-extProperties.xml +5 -0
- data/spec/fixtures/druid-cm234kq4672-stories.xml +17 -0
- data/spec/fixtures/druid-hc513kw4806-descMetadata.xml +11 -0
- data/spec/fixtures/rels_ext_cmodel.xml +8 -0
- data/spec/rcov.opts +2 -0
- data/spec/spec.opts +1 -0
- data/spec/spec_helper.rb +16 -0
- data/spec/units/extractor_spec.rb +50 -0
- data/spec/units/indexer_spec.rb +127 -0
- data/spec/units/shelver_spec.rb +42 -0
- metadata +106 -0
@@ -0,0 +1,54 @@
|
|
1
|
+
|
2
|
+
require 'active-fedora'
|
3
|
+
|
4
|
+
|
5
|
+
module Shelver
|
6
|
+
class Repository
|
7
|
+
|
8
|
+
#
|
9
|
+
# This method initializes the fedora repository and solr instance
|
10
|
+
#
|
11
|
+
def initialize_repository
|
12
|
+
Fedora::Repository.register( FEDORA_URL )
|
13
|
+
ActiveFedora::SolrService.register( FEDORA_SOLR_URL )
|
14
|
+
end
|
15
|
+
|
16
|
+
#
|
17
|
+
# This method retrieves a comprehensive list of unique ids in the fedora repository
|
18
|
+
#
|
19
|
+
def self.get_pids( num_docs )
|
20
|
+
solr_results = ActiveFedora::SolrService.instance.conn.query( "active_fedora_model_field:Document", { :rows => num_docs } )
|
21
|
+
id_array = []
|
22
|
+
solr_results.hits.each do |hit|
|
23
|
+
id_array << hit[SOLR_DOCUMENT_ID]
|
24
|
+
end
|
25
|
+
return id_array
|
26
|
+
end
|
27
|
+
|
28
|
+
#
|
29
|
+
# This method retrieves the object associated with the given unique id
|
30
|
+
#
|
31
|
+
def self.get_object( pid )
|
32
|
+
object = ActiveFedora::Base.load_instance( pid )
|
33
|
+
end
|
34
|
+
|
35
|
+
#
|
36
|
+
# This method retrieves a comprehensive list of datastreams for the given object
|
37
|
+
#
|
38
|
+
def self.get_datastreams( obj )
|
39
|
+
ds_keys = obj.datastreams.keys
|
40
|
+
end
|
41
|
+
|
42
|
+
#
|
43
|
+
# This method retrieves the datastream for the given object with the given datastream name
|
44
|
+
#
|
45
|
+
def self.get_datastream( obj, ds_name )
|
46
|
+
begin
|
47
|
+
obj.datastreams[ ds_name ]
|
48
|
+
rescue
|
49
|
+
return nil
|
50
|
+
end
|
51
|
+
end
|
52
|
+
|
53
|
+
end
|
54
|
+
end
|
data/lib/shelver.rb
ADDED
@@ -0,0 +1,103 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require 'shelver/indexer.rb'
|
3
|
+
# require 'fastercsv'
|
4
|
+
require "ruby-debug"
|
5
|
+
|
6
|
+
|
7
|
+
|
8
|
+
module Shelver
|
9
|
+
class Shelver
|
10
|
+
|
11
|
+
attr_accessor :indexer, :index_full_text
|
12
|
+
|
13
|
+
#
|
14
|
+
# This method initializes the indexer
|
15
|
+
# If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
|
16
|
+
#
|
17
|
+
def initialize( opts={} )
|
18
|
+
@@index_list = false unless defined?(@@index_list)
|
19
|
+
if opts[:index_full_text] == true || opts[:index_full_text] == "true"
|
20
|
+
@index_full_text = true
|
21
|
+
else
|
22
|
+
@index_full_text = false
|
23
|
+
end
|
24
|
+
@indexer = Indexer.new( :index_full_text=>@index_full_text )
|
25
|
+
end
|
26
|
+
|
27
|
+
#
|
28
|
+
# This method shelves the given Fedora object's full-text and facets into the search index
|
29
|
+
#
|
30
|
+
def shelve_object( obj )
|
31
|
+
# retrieve the Fedora object based on the given unique id
|
32
|
+
|
33
|
+
begin
|
34
|
+
|
35
|
+
start = Time.now
|
36
|
+
print "Retrieving object #{obj} ..."
|
37
|
+
obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
|
38
|
+
|
39
|
+
obj_done = Time.now
|
40
|
+
obj_done_elapse = obj_done - start
|
41
|
+
puts " completed. Duration: #{obj_done_elapse}"
|
42
|
+
|
43
|
+
unless obj.datastreams['descMetadata'].nil? || obj.datastreams['location'].nil?
|
44
|
+
print "\t Indexing object #{obj.pid} ... "
|
45
|
+
# add the keywords and facets to the search index
|
46
|
+
index_start = Time.now
|
47
|
+
indexer.index( obj )
|
48
|
+
|
49
|
+
index_done = Time.now
|
50
|
+
index_elapsed = index_done - index_start
|
51
|
+
|
52
|
+
puts "completed. Duration: #{index_elapsed} ."
|
53
|
+
end #unless
|
54
|
+
|
55
|
+
|
56
|
+
rescue Exception => e
|
57
|
+
p "unable to index #{obj}. Failed with #{e.inspect}"
|
58
|
+
|
59
|
+
|
60
|
+
end #begin
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
#
|
65
|
+
# This method retrieves a comprehensive list of all the unique identifiers in Fedora and
|
66
|
+
# shelves each object's full-text and facets into the search index
|
67
|
+
#
|
68
|
+
def shelve_objects
|
69
|
+
# retrieve a list of all the pids in the fedora repository
|
70
|
+
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
|
71
|
+
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
|
72
|
+
|
73
|
+
if @@index_list == false
|
74
|
+
|
75
|
+
pids = Repository.get_pids( num_docs )
|
76
|
+
puts "Shelving #{pids.length} Fedora objects"
|
77
|
+
pids.each do |pid|
|
78
|
+
unless pid[0].empty? || pid[0].nil?
|
79
|
+
shelve_object( pid )
|
80
|
+
end
|
81
|
+
end #pids.each
|
82
|
+
|
83
|
+
else
|
84
|
+
|
85
|
+
if File.exists?(@@index_list)
|
86
|
+
arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
|
87
|
+
|
88
|
+
puts "Indexing from list at #{@@index_list}"
|
89
|
+
puts "Shelving #{arr_of_pids.length} Fedora objects"
|
90
|
+
|
91
|
+
arr_of_pids.each do |row|
|
92
|
+
pid = row[0]
|
93
|
+
shelve_object( pid )
|
94
|
+
end #FASTERCSV
|
95
|
+
else
|
96
|
+
puts "#{@@index_list} does not exists!"
|
97
|
+
end #if File.exists
|
98
|
+
|
99
|
+
end #if Index_LISTS
|
100
|
+
end #shelve_objects
|
101
|
+
|
102
|
+
end #class
|
103
|
+
end #module
|
@@ -0,0 +1,33 @@
|
|
1
|
+
namespace :shelver do
|
2
|
+
|
3
|
+
desc 'Index a fedora object of the given pid.'
|
4
|
+
task :shelve_object => :environment do
|
5
|
+
index_full_text = ENV['FULL_TEXT'] == 'true'
|
6
|
+
if ENV['PID']
|
7
|
+
puts "indexing #{ENV['PID'].inspect}"
|
8
|
+
shelver = Shelver::Shelver.new :index_full_text=> index_full_text
|
9
|
+
shelver.shelve_object(ENV['PID'])
|
10
|
+
puts "Finished shelving #{ENV['PID']}"
|
11
|
+
else
|
12
|
+
puts "You must provide a pid using the format 'shelver::shelve_object PID=sample:pid'."
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
desc 'Index all objects in the repository.'
|
17
|
+
task :shelve_objects => :environment do
|
18
|
+
index_full_text = ENV['FULL_TEXT'] == 'true'
|
19
|
+
if ENV['INDEX_LIST']
|
20
|
+
@@index_list = ENV['INDEX_LIST']
|
21
|
+
end
|
22
|
+
|
23
|
+
puts "Re-indexing Fedora Repository."
|
24
|
+
puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
|
25
|
+
puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
|
26
|
+
puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
|
27
|
+
puts "Doing full text index." if index_full_text
|
28
|
+
shelver = Shelver::Shelver.new :index_full_text=> index_full_text
|
29
|
+
shelver.shelve_objects
|
30
|
+
puts "Shelver task complete."
|
31
|
+
end
|
32
|
+
|
33
|
+
end
|
data/shelver.gemspec
ADDED
@@ -0,0 +1,74 @@
|
|
1
|
+
# Generated by jeweler
|
2
|
+
# DO NOT EDIT THIS FILE DIRECTLY
|
3
|
+
# Instead, edit Jeweler::Tasks in Rakefile, and run the gemspec command
|
4
|
+
# -*- encoding: utf-8 -*-
|
5
|
+
|
6
|
+
Gem::Specification.new do |s|
|
7
|
+
s.name = %q{shelver}
|
8
|
+
s.version = "0.0.0"
|
9
|
+
|
10
|
+
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
|
+
s.authors = ["Matt Zumwalt"]
|
12
|
+
s.date = %q{2010-03-24}
|
13
|
+
s.description = %q{Use shelver to populate solr indexes from Fedora repository content or from other sources. You can run shelver from within your apps, using the provided rake tasks, or as a JMS listener}
|
14
|
+
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
|
+
s.extra_rdoc_files = [
|
16
|
+
"LICENSE",
|
17
|
+
"README.rdoc"
|
18
|
+
]
|
19
|
+
s.files = [
|
20
|
+
".gitignore",
|
21
|
+
"LICENSE",
|
22
|
+
"README.rdoc",
|
23
|
+
"Rakefile",
|
24
|
+
"VERSION",
|
25
|
+
"config/hydra_types.yml",
|
26
|
+
"config/solr.yml",
|
27
|
+
"lib/shelver.rb",
|
28
|
+
"lib/shelver/configuration.rb",
|
29
|
+
"lib/shelver/extractor.rb",
|
30
|
+
"lib/shelver/indexer.rb",
|
31
|
+
"lib/shelver/main.rb",
|
32
|
+
"lib/shelver/replicator.rb",
|
33
|
+
"lib/shelver/repository.rb",
|
34
|
+
"lib/tasks/shelver.rake",
|
35
|
+
"shelver.gemspec",
|
36
|
+
"spec/fixtures/druid-bv448hq0314-descMetadata.xml",
|
37
|
+
"spec/fixtures/druid-bv448hq0314-extProperties.xml",
|
38
|
+
"spec/fixtures/druid-cm234kq4672-extProperties.xml",
|
39
|
+
"spec/fixtures/druid-cm234kq4672-stories.xml",
|
40
|
+
"spec/fixtures/druid-hc513kw4806-descMetadata.xml",
|
41
|
+
"spec/fixtures/rels_ext_cmodel.xml",
|
42
|
+
"spec/rcov.opts",
|
43
|
+
"spec/spec.opts",
|
44
|
+
"spec/spec_helper.rb",
|
45
|
+
"spec/units/extractor_spec.rb",
|
46
|
+
"spec/units/indexer_spec.rb",
|
47
|
+
"spec/units/shelver_spec.rb"
|
48
|
+
]
|
49
|
+
s.homepage = %q{http://github.com/mediashelf/shelver}
|
50
|
+
s.rdoc_options = ["--charset=UTF-8"]
|
51
|
+
s.require_paths = ["lib"]
|
52
|
+
s.rubygems_version = %q{1.3.6}
|
53
|
+
s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
|
54
|
+
s.test_files = [
|
55
|
+
"spec/spec_helper.rb",
|
56
|
+
"spec/units/extractor_spec.rb",
|
57
|
+
"spec/units/indexer_spec.rb",
|
58
|
+
"spec/units/shelver_spec.rb"
|
59
|
+
]
|
60
|
+
|
61
|
+
if s.respond_to? :specification_version then
|
62
|
+
current_version = Gem::Specification::CURRENT_SPECIFICATION_VERSION
|
63
|
+
s.specification_version = 3
|
64
|
+
|
65
|
+
if Gem::Version.new(Gem::RubyGemsVersion) >= Gem::Version.new('1.2.0') then
|
66
|
+
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
67
|
+
else
|
68
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
69
|
+
end
|
70
|
+
else
|
71
|
+
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
+
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
+
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
+
<dcterms:rights>Presumed under copyright. Do not publish.</dcterms:rights>
|
5
|
+
<dcterms:date>1985-12-30</dcterms:date>
|
6
|
+
<dcterms:format>application/tiff</dcterms:format>
|
7
|
+
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
+
<dcterms:format>application/pdf</dcterms:format>
|
9
|
+
<dcterms:title>This is a Sample Title</dcterms:title>
|
10
|
+
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
+
</dc>
|
@@ -0,0 +1,52 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<document>
|
3
|
+
<attributes>
|
4
|
+
<attribute type="item">5958</attribute>
|
5
|
+
<attribute type="objectid">FEI0010-00013142</attribute>
|
6
|
+
<attribute type="title">Letter from Ellie Engelmore to Professor K. C. Reddy</attribute>
|
7
|
+
<attribute type="copyright">Presumed under copyright. Do not publish.</attribute>
|
8
|
+
<attribute type="description"/>
|
9
|
+
<attribute type="date">1985-12-30</attribute>
|
10
|
+
<attribute type="datestr">30/12/1985</attribute>
|
11
|
+
<attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00013142.pdf</attribute>
|
12
|
+
<attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00013142.png</attribute>
|
13
|
+
<attribute type="url"/>
|
14
|
+
<attribute type="industryterm"/>
|
15
|
+
<attribute type="technology">artificial intelligence</attribute>
|
16
|
+
<attribute type="company"/>
|
17
|
+
<attribute type="person">ELLIE ENGELMORE</attribute>
|
18
|
+
<attribute type="year">1985</attribute>
|
19
|
+
<attribute type="organization">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</attribute>
|
20
|
+
<attribute type="sourcelocation">Folder 15</attribute>
|
21
|
+
</attributes>
|
22
|
+
<facets>
|
23
|
+
<facet type="year" id="49">1980s</facet>
|
24
|
+
<facet type="year" id="49">1985</facet>
|
25
|
+
<facet type="year" id="42">1980s</facet>
|
26
|
+
<facet type="sourcelocation" id="592">Feigenbaum</facet>
|
27
|
+
<facet type="sourcelocation" id="592">eaf7000</facet>
|
28
|
+
<facet type="sourcelocation" id="592">Box 51A</facet>
|
29
|
+
<facet type="sourcelocation" id="594">Feigenbaum</facet>
|
30
|
+
<facet type="sourcelocation" id="594">eaf7000</facet>
|
31
|
+
<facet type="sourcelocation" id="594">Box 51A</facet>
|
32
|
+
<facet type="sourcelocation" id="594">Folder 15</facet>
|
33
|
+
<facet type="sourcelocation" id="691">Feigenbaum</facet>
|
34
|
+
<facet type="sourcelocation" id="692">Feigenbaum</facet>
|
35
|
+
<facet type="sourcelocation" id="692">eaf7000</facet>
|
36
|
+
<facet type="doctype" id="32">Correspondence</facet>
|
37
|
+
<facet type="city" id="82">Ann Arbor</facet>
|
38
|
+
<facet type="city" id="910">Hyderabad</facet>
|
39
|
+
<facet type="city" id="1519">Palo Alto</facet>
|
40
|
+
<facet type="country" id="68">India</facet>
|
41
|
+
<facet type="emailaddress" id="288">EENGELMORE@SUMEX-AIM.ARPA</facet>
|
42
|
+
<facet type="organization" id="5065">Heuristic Programming Project</facet>
|
43
|
+
<facet type="organization" id="7012">Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder</facet>
|
44
|
+
<facet type="organization" id="8878">Professor K. C. Reddy School of Mathematics and Computer/Information Sciences</facet>
|
45
|
+
<facet type="person" id="5810">ELLIE ENGELMORE</facet>
|
46
|
+
<facet type="person" id="17934">Reddy</facet>
|
47
|
+
<facet type="person" id="5787">EDWARD FEIGENBAUM</facet>
|
48
|
+
<facet type="provinceorstate" id="96">Michigan</facet>
|
49
|
+
<facet type="provinceorstate" id="27">California</facet>
|
50
|
+
<facet type="technology" id="1713">artificial intelligence</facet>
|
51
|
+
</facets>
|
52
|
+
</document>
|
@@ -0,0 +1,5 @@
|
|
1
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
2
|
+
<document>
|
3
|
+
<attributes><attribute type="item">4290</attribute><attribute type="objectid">FEI0010-00011325</attribute><attribute type="title">Letter from Ellie Engelmore to Wemara Lichty</attribute><attribute type="copyright">Presumed under copyright. Do not publish.</attribute><attribute type="description"/><attribute type="date">1984-6-4</attribute><attribute type="datestr">4/6/1984</attribute><attribute type="docurl">https://www.stanford.edu/group/salt_project/SLA/Feigenbaum/eaf7000/pdf/00011325.pdf</attribute><attribute type="doctn">http://www.stanford.edu/group/salt_project/cgi-bin/SLA/Feigenbaum/eaf7000/png/small_00011325.png</attribute><attribute type="url"/><attribute type="industryterm"/><attribute type="technology">artificial intelligence</attribute><attribute type="company"/><attribute type="person">A. FEIGENBAUM</attribute><attribute type="year">1984</attribute><attribute type="organization">McAlcster Hall University</attribute><attribute type="sourcelocation">Folder 5</attribute></attributes>
|
4
|
+
<facets><facet type="year" id="48">1980s</facet><facet type="year" id="48">1984</facet><facet type="year" id="42">1980s</facet><facet type="sourcelocation" id="578">Feigenbaum</facet><facet type="sourcelocation" id="578">eaf7000</facet><facet type="sourcelocation" id="578">Box 51</facet><facet type="sourcelocation" id="587">Feigenbaum</facet><facet type="sourcelocation" id="587">eaf7000</facet><facet type="sourcelocation" id="587">Box 51</facet><facet type="sourcelocation" id="587">Folder 5</facet><facet type="sourcelocation" id="692">Feigenbaum</facet><facet type="sourcelocation" id="692">eaf7000</facet><facet type="sourcelocation" id="691">Feigenbaum</facet><facet type="doctype" id="32">Correspondence</facet><facet type="city" id="1948">Stanford</facet><facet type="country" id="33">Columbia</facet><facet type="facility" id="2551">U. Missouri library</facet><facet type="organization" id="5065">Heuristic Programming Project</facet><facet type="organization" id="7026">McAlcster Hall University</facet><facet type="organization" id="9645">STANFORD UNIVERSITY</facet><facet type="organization" id="11964">University of Missouri</facet><facet type="organization" id="12407">Wemara Lichty Psychology Department</facet><facet type="person" id="15650">Morton Hunt</facet><facet type="person" id="37">A. FEIGENBAUM</facet><facet type="person" id="5810">ELLIE ENGELMORE</facet><facet type="provinceorstate" id="27">California</facet><facet type="provinceorstate" id="100">Missouri</facet><facet type="publishedmedium" id="44">Artificial Intelligence</facet><facet type="technology" id="1713">artificial intelligence</facet></facets>
|
5
|
+
</document>
|
@@ -0,0 +1,17 @@
|
|
1
|
+
|
2
|
+
<html>
|
3
|
+
<body>
|
4
|
+
<pre>
|
5
|
+
This is
|
6
|
+
preformatted text.
|
7
|
+
It preserves both spaces
|
8
|
+
and line breaks.
|
9
|
+
</pre>
|
10
|
+
<p>The pre tag is good for displaying computer code:</p>
|
11
|
+
<pre>
|
12
|
+
for i = 1 to 10
|
13
|
+
print i
|
14
|
+
next i
|
15
|
+
</pre>
|
16
|
+
</body>
|
17
|
+
</html>
|
@@ -0,0 +1,11 @@
|
|
1
|
+
<dc xmlns:dcterms="http://purl.org/dc/terms/" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
2
|
+
<dcterms:type xsi:type="DCMITYPE">text</dcterms:type>
|
3
|
+
<dcterms:medium>Paper Document</dcterms:medium>
|
4
|
+
<dcterms:rights>Copyright © 2006 All rights reserved. Distribution for commercial purposes is prohibited.</dcterms:rights>
|
5
|
+
<dcterms:date/>
|
6
|
+
<dcterms:format>application/tiff</dcterms:format>
|
7
|
+
<dcterms:format>application/jp2000</dcterms:format>
|
8
|
+
<dcterms:format>application/pdf</dcterms:format>
|
9
|
+
<dcterms:title>The Rise and Fall of the YouTube Empire</dcterms:title>
|
10
|
+
<dcterms:publisher>Sample Unversity</dcterms:publisher>
|
11
|
+
</dc>
|
@@ -0,0 +1,8 @@
|
|
1
|
+
<rdf:RDF xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#">
|
2
|
+
<rdf:Description rdf:about="info:fedora/demo:multipurpose-objects-model_and_sdef">
|
3
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/fedora-system:ContentModel-3.0"/>
|
4
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:SaltDocument"/>
|
5
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:JP2Document"/>
|
6
|
+
<hasModel xmlns="info:fedora/fedora-system:def/model#" rdf:resource="info:fedora/afmodel:DCDocument"/>
|
7
|
+
</rdf:Description>
|
8
|
+
</rdf:RDF>
|
data/spec/rcov.opts
ADDED
data/spec/spec.opts
ADDED
@@ -0,0 +1 @@
|
|
1
|
+
--color
|
data/spec/spec_helper.rb
ADDED
@@ -0,0 +1,16 @@
|
|
1
|
+
$LOAD_PATH.unshift(File.dirname(__FILE__))
|
2
|
+
$LOAD_PATH.unshift(File.join(File.dirname(__FILE__), '..', 'lib'))
|
3
|
+
require 'shelver'
|
4
|
+
require 'spec'
|
5
|
+
require 'spec/autorun'
|
6
|
+
|
7
|
+
Spec::Runner.configure do |config|
|
8
|
+
|
9
|
+
config.mock_with :mocha
|
10
|
+
|
11
|
+
|
12
|
+
def fixture(file)
|
13
|
+
File.new(File.join(File.dirname(__FILE__), 'fixtures', file))
|
14
|
+
end
|
15
|
+
|
16
|
+
end
|
@@ -0,0 +1,50 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Extractor do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@extractor = Shelver::Extractor.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe ".xml_to_solr" do
|
11
|
+
it "should turn simple xml into a solr document" do
|
12
|
+
desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
|
13
|
+
result = @extractor.xml_to_solr(desc_meta)
|
14
|
+
result[:type_t].should == "text"
|
15
|
+
result[:medium_t].should == "Paper Document"
|
16
|
+
result[:rights_t].should == "Presumed under copyright. Do not publish."
|
17
|
+
result[:date_t].should == "1985-12-30"
|
18
|
+
result[:format_t].should == "application/tiff"
|
19
|
+
result[:title_t].should == "This is a Sample Title"
|
20
|
+
result[:publisher_t].should == "Sample Unversity"
|
21
|
+
|
22
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
23
|
+
result.inspect.include?('@value="application/tiff"').should be_true
|
24
|
+
result.inspect.include?('@value="application/pdf"').should be_true
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
describe "extract_rels_ext" do
|
29
|
+
it "should extract the content model of the RELS-EXT datastream of a Fedora object and set hydra_type using hydra_types mapping" do
|
30
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
31
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
32
|
+
result[:cmodel_t].should == "info:fedora/fedora-system:ContentModel-3.0"
|
33
|
+
result[:hydra_type_t].should == "salt_document"
|
34
|
+
|
35
|
+
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
36
|
+
result.inspect.include?('@value="info:fedora/afmodel:SaltDocument"').should be_true
|
37
|
+
result.inspect.include?('@value="jp2_document"').should be_true
|
38
|
+
end
|
39
|
+
end
|
40
|
+
|
41
|
+
describe "extract_hydra_types" do
|
42
|
+
it "should extract the hydra_type of a Fedora object" do
|
43
|
+
rels_ext = fixture("rels_ext_cmodel.xml")
|
44
|
+
result = @extractor.extract_rels_ext( rels_ext )
|
45
|
+
result[:hydra_type_t].should == "salt_document"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
|
50
|
+
end
|
@@ -0,0 +1,127 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Indexer do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
Shelver::Indexer.any_instance.stubs(:connect).returns("foo")
|
8
|
+
|
9
|
+
@extractor = mock("Extractor")
|
10
|
+
@extractor.stubs(:html_content_to_solr).returns(@solr_doc)
|
11
|
+
# @solr_doc = mock('solr_doc')
|
12
|
+
# @solr_doc.stubs(:<<)
|
13
|
+
# @solr_doc.stubs(:[])
|
14
|
+
|
15
|
+
@solr_doc = Solr::Document.new
|
16
|
+
|
17
|
+
Shelver::Extractor.expects(:new).returns(@extractor)
|
18
|
+
@indexer = Shelver::Indexer.new
|
19
|
+
|
20
|
+
end
|
21
|
+
|
22
|
+
describe "#generate_dates" do
|
23
|
+
it "should still give 9999-99-99 date if the solr document does not have a date_t field" do
|
24
|
+
|
25
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
26
|
+
solr_result.should be_kind_of Solr::Document
|
27
|
+
solr_result[:date_t].should == "9999-99-99"
|
28
|
+
solr_result[:month_facet].should == "99"
|
29
|
+
solr_result[:day_facet].should == '99'
|
30
|
+
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should still give 9999-99-99 date if the solr_doc[:date_t] is not valid date in YYYY-MM-DD format " do
|
34
|
+
|
35
|
+
@solr_doc << Solr::Field.new(:date_t => "Unknown")
|
36
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
37
|
+
solr_result.should be_kind_of Solr::Document
|
38
|
+
solr_result[:date_t].should == "Unknown"
|
39
|
+
solr_result[:month_facet].should == "99"
|
40
|
+
solr_result[:day_facet].should == '99'
|
41
|
+
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should give month and dates even if the :date_t is not a valid date but is in YYYY-MM-DD format " do
|
45
|
+
|
46
|
+
@solr_doc << Solr::Field.new(:date_t => "0000-13-11")
|
47
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
48
|
+
solr_result.should be_kind_of Solr::Document
|
49
|
+
solr_result[:date_t].should == "0000-13-11"
|
50
|
+
solr_result[:month_facet].should == "99"
|
51
|
+
solr_result[:day_facet].should == '11'
|
52
|
+
end
|
53
|
+
|
54
|
+
it "should give month and day when in a valid date format" do
|
55
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-04-11")
|
56
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
57
|
+
solr_result.should be_kind_of Solr::Document
|
58
|
+
solr_result[:date_t].should == "1978-04-11"
|
59
|
+
solr_result[:month_facet].should == "04"
|
60
|
+
solr_result[:day_facet].should == '11'
|
61
|
+
|
62
|
+
end
|
63
|
+
|
64
|
+
it "should still give two digit strings even if the month/day is single digit" do
|
65
|
+
|
66
|
+
@solr_doc << Solr::Field.new(:date_t => "1978-4-1")
|
67
|
+
solr_result = @indexer.generate_dates(@solr_doc)
|
68
|
+
solr_result.should be_kind_of Solr::Document
|
69
|
+
solr_result[:date_t].should == "1978-4-1"
|
70
|
+
solr_result[:month_facet].should == "04"
|
71
|
+
solr_result[:day_facet].should == '01'
|
72
|
+
|
73
|
+
end
|
74
|
+
|
75
|
+
end
|
76
|
+
|
77
|
+
|
78
|
+
|
79
|
+
describe "#solrize" do
|
80
|
+
it "should convert a hash to a solr doc" do
|
81
|
+
example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
|
82
|
+
|
83
|
+
example_result = Shelver::Indexer.solrize( example_hash )
|
84
|
+
example_result.should be_kind_of Solr::Document
|
85
|
+
example_hash.each_pair do |key,values|
|
86
|
+
if values.class == String
|
87
|
+
example_result["#{key}_facet"].should == values
|
88
|
+
else
|
89
|
+
values.each do |v|
|
90
|
+
example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
|
91
|
+
example_result.inspect.include?("@value=\"#{v}\"").should be_true
|
92
|
+
end
|
93
|
+
end
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
it "should handle hashes with facets listed in a sub-hash" do
|
98
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
99
|
+
result = Shelver::Indexer.solrize( simple_hash )
|
100
|
+
result.should be_kind_of Solr::Document
|
101
|
+
result["technology_facet"].should == "t1"
|
102
|
+
result.inspect.include?('@boost=nil').should be_true
|
103
|
+
result.inspect.include?('@name="technology_facet"').should be_true
|
104
|
+
result.inspect.include?('@value="t2"').should be_true
|
105
|
+
result["company_facet"].should == "c1"
|
106
|
+
result["person_facet"].should == "p1"
|
107
|
+
result.inspect.include?('@name="person_facet"').should be_true
|
108
|
+
result.inspect.include?('@value="p2"').should be_true
|
109
|
+
|
110
|
+
end
|
111
|
+
|
112
|
+
it "should create symbols from the :symbols subhash" do
|
113
|
+
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
114
|
+
result = Shelver::Indexer.solrize( simple_hash )
|
115
|
+
result.should be_kind_of Solr::Document
|
116
|
+
result["technology_s"].should == "t1"
|
117
|
+
result.inspect.include?('@name="technology_s"').should be_true
|
118
|
+
result.inspect.include?('@value="t2"').should be_true
|
119
|
+
|
120
|
+
result["company_s"].should == "c1"
|
121
|
+
result["person_s"].should == "p1"
|
122
|
+
result.inspect.include?('@name="person_s"').should be_true
|
123
|
+
result.inspect.include?('@value="p2"').should be_true
|
124
|
+
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
@@ -0,0 +1,42 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'shelver'
|
3
|
+
|
4
|
+
describe Shelver::Shelver do
|
5
|
+
|
6
|
+
before(:each) do
|
7
|
+
@shelver = Shelver::Shelver.new
|
8
|
+
end
|
9
|
+
|
10
|
+
describe "shelve_object" do
|
11
|
+
it "should trigger the indexer for the provided object" do
|
12
|
+
# sample_obj = ActiveFedora::Base.new
|
13
|
+
mock_object = mock("my object")
|
14
|
+
mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
|
15
|
+
mock_object.stubs(:pid)
|
16
|
+
mock_object.stubs(:label)
|
17
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
18
|
+
ActiveFedora::Base.expects(:load_instance).never
|
19
|
+
@shelver.indexer.expects(:index).with( mock_object )
|
20
|
+
@shelver.shelve_object( mock_object )
|
21
|
+
end
|
22
|
+
it "should still load the object if only a pid is provided" do
|
23
|
+
mock_object = mock("my object")
|
24
|
+
mock_object.stubs(:pid)
|
25
|
+
mock_object.stubs(:label)
|
26
|
+
mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
|
27
|
+
|
28
|
+
ActiveFedora::Base.expects(:load_instance).with( "_PID_" ).returns(mock_object)
|
29
|
+
@shelver.indexer.expects(:index).with(mock_object)
|
30
|
+
@shelver.shelve_object("_PID_")
|
31
|
+
end
|
32
|
+
end
|
33
|
+
|
34
|
+
describe "shelve_objects" do
|
35
|
+
it "should call shelve_object for each pid returned by solr" do
|
36
|
+
pids = [["pid1"], ["pid2"], ["pid3"]]
|
37
|
+
Shelver::Repository.expects(:get_pids).returns(pids)
|
38
|
+
pids.each {|pid| @shelver.expects(:shelve_object).with( pid ) }
|
39
|
+
@shelver.shelve_objects
|
40
|
+
end
|
41
|
+
end
|
42
|
+
end
|