solrizer 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/config/solr_mappings.yml +14 -0
- data/lib/solrizer/field_name_mapper.rb +62 -0
- data/lib/solrizer/xml/terminology_based_solrizer.rb +104 -0
- data/lib/solrizer/xml.rb +4 -0
- data/lib/solrizer.rb +18 -2
- data/solrizer.gemspec +17 -4
- data/spec/fixtures/mods_articles/hydrangea_article1.xml +90 -0
- data/spec/fixtures/solr_mappings_af_0.1.yml +16 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/units/field_name_mapper_spec.rb +41 -0
- data/spec/units/shelver_spec.rb +13 -13
- data/spec/units/xml_terminology_based_solrizer_spec.rb +88 -0
- metadata +33 -7
data/Rakefile
CHANGED
@@ -11,6 +11,7 @@ begin
|
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
13
|
gem.add_dependency "active-fedora", ">= 1.1.5"
|
14
|
+
gem.add_dependency "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
|
14
15
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
17
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require "yaml"
|
2
|
+
|
3
|
+
module Solrizer
|
4
|
+
module FieldNameMapper
|
5
|
+
|
6
|
+
# Module Methods & Attributes
|
7
|
+
@@mappings = {}
|
8
|
+
|
9
|
+
# Generates solr field names from settings in solr_mappings
|
10
|
+
def self.solr_name(field_name, field_type)
|
11
|
+
name = field_name.to_s + self.mappings[field_type.to_s].to_s
|
12
|
+
if field_name.kind_of?(Symbol)
|
13
|
+
return name.to_sym
|
14
|
+
else
|
15
|
+
return name.to_s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.mappings
|
20
|
+
@@mappings
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.mappings=(mappings)
|
24
|
+
@@mappings = mappings
|
25
|
+
end
|
26
|
+
|
27
|
+
# Instance Methods
|
28
|
+
|
29
|
+
def solr_name(field_name, field_type)
|
30
|
+
::Solrizer::FieldNameMapper.solr_name(field_name, field_type)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.logger
|
34
|
+
@logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Loads solr mappings from yml file.
|
38
|
+
# @config_path This is the path to the directory where your mappings file is stored. @default "RAILS_ROOT/config/solr_mappings.yml"
|
39
|
+
# @mappings_file This is the filename for your solr mappings YAML file. @default solr_mappings.yml
|
40
|
+
def self.load_mappings( config_path=nil )
|
41
|
+
|
42
|
+
if config_path.nil?
|
43
|
+
if defined?(RAILS_ROOT)
|
44
|
+
config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
|
45
|
+
end
|
46
|
+
# Default to using the config file within the gem
|
47
|
+
if !File.exist?(config_path.to_s)
|
48
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
|
53
|
+
|
54
|
+
@@mappings = YAML::load(File.open(config_path))
|
55
|
+
|
56
|
+
mappings["id"] = "id" unless mappings["id"]
|
57
|
+
end
|
58
|
+
|
59
|
+
# This ensures that some mappings will always be loaded
|
60
|
+
self.load_mappings
|
61
|
+
end #FieldNameMapper
|
62
|
+
end #Solrizer
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
|
2
|
+
module Solrizer::XML::TerminologyBasedSolrizer
|
3
|
+
|
4
|
+
# Module Methods
|
5
|
+
|
6
|
+
# Build a solr document from +doc+ based on its terminology
|
7
|
+
# @doc OM::XML::Document
|
8
|
+
# @solr_doc (optional) Solr::Document to populate
|
9
|
+
def self.solrize(doc, solr_doc=Solr::Document.new)
|
10
|
+
unless doc.class.terminology.nil?
|
11
|
+
doc.class.terminology.terms.each_pair do |term_name,term|
|
12
|
+
doc.solrize_term(term, solr_doc)
|
13
|
+
# self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
return solr_doc
|
18
|
+
end
|
19
|
+
|
20
|
+
# Populate a solr document with fields based on nodes in +xml+ corresponding to the
|
21
|
+
# term identified by +term_pointer+ within +terminology+
|
22
|
+
# @doc OM::XML::Document or Nokogiri::XML::Node
|
23
|
+
# @term_pointer Array pointing to the desired term in +terminology+
|
24
|
+
def self.solrize_term(doc, term, solr_doc = Solr::Document.new, opts={})
|
25
|
+
terminology = doc.class.terminology
|
26
|
+
parents = opts.fetch(:parents, [])
|
27
|
+
|
28
|
+
term_pointer = parents+[term.name]
|
29
|
+
|
30
|
+
# term = terminology.retrieve_term(term_pointer)
|
31
|
+
|
32
|
+
# prep children hash
|
33
|
+
# child_accessors = accessor_info.fetch(:children, {})
|
34
|
+
# xpath = term.xpath_for(*term_pointer)
|
35
|
+
nodeset = doc.find_by_terms(*term_pointer)
|
36
|
+
|
37
|
+
nodeset.each do |node|
|
38
|
+
# create solr fields
|
39
|
+
|
40
|
+
self.solrize_node(node, doc, term_pointer, term, solr_doc)
|
41
|
+
term.children.each_pair do |child_term_name, child_term|
|
42
|
+
doc.solrize_term(child_term, solr_doc, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
43
|
+
# self.solrize_term(doc, child_term_name, child_term, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
|
44
|
+
end
|
45
|
+
end
|
46
|
+
solr_doc
|
47
|
+
end
|
48
|
+
|
49
|
+
# Populate a solr document with solr fields corresponding to the given xml node
|
50
|
+
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
51
|
+
# @doc OM::XML::Document or Nokogiri::XML::Node
|
52
|
+
# @term_pointer Array pointing to the desired term in +terminology+
|
53
|
+
# @solr_doc (optional) Solr::Document to populate
|
54
|
+
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new)
|
55
|
+
terminology = doc.class.terminology
|
56
|
+
# term = terminology.retrieve_term(*term_pointer)
|
57
|
+
|
58
|
+
if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
|
59
|
+
node_value = node.value
|
60
|
+
else
|
61
|
+
node_value = node.text
|
62
|
+
end
|
63
|
+
|
64
|
+
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
65
|
+
generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
|
66
|
+
|
67
|
+
solr_doc << Solr::Field.new(generic_field_name => node_value)
|
68
|
+
|
69
|
+
if term_pointer.length > 1
|
70
|
+
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
71
|
+
hierarchical_field_name = self.generate_solr_symbol(hierarchical_field_name_base, term.data_type)
|
72
|
+
solr_doc << Solr::Field.new(hierarchical_field_name => node_value)
|
73
|
+
end
|
74
|
+
solr_doc
|
75
|
+
end
|
76
|
+
|
77
|
+
# Use Solrizer::FieldNameMapper to generate an appropriate solr field name +field_name+ and +field_type+
|
78
|
+
def self.generate_solr_symbol(field_name, field_type) # :nodoc:
|
79
|
+
Solrizer::FieldNameMapper.solr_name(field_name, field_type)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Instance Methods
|
83
|
+
|
84
|
+
|
85
|
+
def to_solr(solr_doc = Solr::Document.new) # :nodoc:
|
86
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc)
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def solrize_term(term, solr_doc = Solr::Document.new, opts={})
|
91
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, opts)
|
92
|
+
end
|
93
|
+
|
94
|
+
def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new)
|
95
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc)
|
96
|
+
end
|
97
|
+
|
98
|
+
protected
|
99
|
+
|
100
|
+
def generate_solr_symbol(field_name, field_type) # :nodoc:
|
101
|
+
Solrizer::XML::TerminologyBasedSolrizer.generate_solr_symbol(field_name, field_type)
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
data/lib/solrizer/xml.rb
ADDED
data/lib/solrizer.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'solrizer/indexer.rb'
|
3
|
+
require 'solrizer/field_name_mapper.rb'
|
4
|
+
|
5
|
+
# Let people explicitly require xml support if they want it ...
|
6
|
+
# require 'solrizer/xml.rb'
|
7
|
+
|
3
8
|
# require 'fastercsv'
|
4
9
|
require "ruby-debug"
|
5
10
|
|
6
11
|
|
7
|
-
|
8
12
|
module Solrizer
|
9
13
|
class Solrizer
|
10
14
|
|
@@ -34,7 +38,19 @@ class Solrizer
|
|
34
38
|
|
35
39
|
start = Time.now
|
36
40
|
print "Retrieving object #{obj} ..."
|
37
|
-
|
41
|
+
|
42
|
+
case obj
|
43
|
+
when ActiveFedora::Base
|
44
|
+
# do nothing
|
45
|
+
when Fedora::FedoraObject
|
46
|
+
obj = Repository.get_object( obj.pid )
|
47
|
+
when String
|
48
|
+
obj = Repository.get_object( obj )
|
49
|
+
else
|
50
|
+
raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
|
38
54
|
|
39
55
|
obj_done = Time.now
|
40
56
|
obj_done_elapse = obj_done - start
|
data/solrizer.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{solrizer}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matt Zumwalt"]
|
12
|
-
s.date = %q{2010-09-
|
12
|
+
s.date = %q{2010-09-15}
|
13
13
|
s.description = %q{Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener}
|
14
14
|
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,13 +26,17 @@ Gem::Specification.new do |s|
|
|
26
26
|
"config/fedora.yml",
|
27
27
|
"config/hydra_types.yml",
|
28
28
|
"config/solr.yml",
|
29
|
+
"config/solr_mappings.yml",
|
29
30
|
"lib/solrizer.rb",
|
30
31
|
"lib/solrizer/configuration.rb",
|
31
32
|
"lib/solrizer/extractor.rb",
|
33
|
+
"lib/solrizer/field_name_mapper.rb",
|
32
34
|
"lib/solrizer/indexer.rb",
|
33
35
|
"lib/solrizer/main.rb",
|
34
36
|
"lib/solrizer/replicator.rb",
|
35
37
|
"lib/solrizer/repository.rb",
|
38
|
+
"lib/solrizer/xml.rb",
|
39
|
+
"lib/solrizer/xml/terminology_based_solrizer.rb",
|
36
40
|
"lib/tasks/solrizer.rake",
|
37
41
|
"solrizer.gemspec",
|
38
42
|
"spec/fixtures/druid-bv448hq0314-descMetadata.xml",
|
@@ -40,14 +44,18 @@ Gem::Specification.new do |s|
|
|
40
44
|
"spec/fixtures/druid-cm234kq4672-extProperties.xml",
|
41
45
|
"spec/fixtures/druid-cm234kq4672-stories.xml",
|
42
46
|
"spec/fixtures/druid-hc513kw4806-descMetadata.xml",
|
47
|
+
"spec/fixtures/mods_articles/hydrangea_article1.xml",
|
43
48
|
"spec/fixtures/rels_ext_cmodel.xml",
|
49
|
+
"spec/fixtures/solr_mappings_af_0.1.yml",
|
44
50
|
"spec/integration/indexer_spec.rb",
|
45
51
|
"spec/rcov.opts",
|
46
52
|
"spec/spec.opts",
|
47
53
|
"spec/spec_helper.rb",
|
48
54
|
"spec/units/extractor_spec.rb",
|
55
|
+
"spec/units/field_name_mapper_spec.rb",
|
49
56
|
"spec/units/indexer_spec.rb",
|
50
|
-
"spec/units/shelver_spec.rb"
|
57
|
+
"spec/units/shelver_spec.rb",
|
58
|
+
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
51
59
|
]
|
52
60
|
s.homepage = %q{http://github.com/projecthydra/solrizer}
|
53
61
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -58,8 +66,10 @@ Gem::Specification.new do |s|
|
|
58
66
|
"spec/integration/indexer_spec.rb",
|
59
67
|
"spec/spec_helper.rb",
|
60
68
|
"spec/units/extractor_spec.rb",
|
69
|
+
"spec/units/field_name_mapper_spec.rb",
|
61
70
|
"spec/units/indexer_spec.rb",
|
62
|
-
"spec/units/shelver_spec.rb"
|
71
|
+
"spec/units/shelver_spec.rb",
|
72
|
+
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
63
73
|
]
|
64
74
|
|
65
75
|
if s.respond_to? :specification_version then
|
@@ -68,13 +78,16 @@ Gem::Specification.new do |s|
|
|
68
78
|
|
69
79
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
70
80
|
s.add_runtime_dependency(%q<active-fedora>, [">= 1.1.5"])
|
81
|
+
s.add_runtime_dependency(%q<om>, [">= 1.0.0"])
|
71
82
|
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
72
83
|
else
|
73
84
|
s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
|
85
|
+
s.add_dependency(%q<om>, [">= 1.0.0"])
|
74
86
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
75
87
|
end
|
76
88
|
else
|
77
89
|
s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
|
90
|
+
s.add_dependency(%q<om>, [">= 1.0.0"])
|
78
91
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
79
92
|
end
|
80
93
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
<mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
|
2
|
+
http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
3
|
+
|
4
|
+
<titleInfo>
|
5
|
+
<nonSort>THE</nonSort>
|
6
|
+
<title>ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
|
7
|
+
<subTitle>SUBTITLE</subTitle>
|
8
|
+
</titleInfo>
|
9
|
+
<titleInfo lang="finnish">
|
10
|
+
<title>Artikkelin otsikko Hydrangea artiklan 1</title>
|
11
|
+
</titleInfo>
|
12
|
+
|
13
|
+
<name type="personal">
|
14
|
+
<namePart type="family">FAMILY NAME</namePart>
|
15
|
+
<namePart type="given">GIVEN NAMES</namePart>
|
16
|
+
<namePart type="termsOfAddress">DR.</namePart>
|
17
|
+
<displayForm>NAME AS IT APPEARS</displayForm>
|
18
|
+
<affiliation>FACULTY, UNIVERSITY</affiliation>
|
19
|
+
<role>
|
20
|
+
<roleTerm authority="marcrelator" type="text">creator</roleTerm>
|
21
|
+
</role>
|
22
|
+
<role>
|
23
|
+
<roleTerm type="text">submitter</roleTerm>
|
24
|
+
</role>
|
25
|
+
</name>
|
26
|
+
|
27
|
+
<name type="personal">
|
28
|
+
<namePart type="family">Gautama</namePart>
|
29
|
+
<namePart type="given">Siddartha</namePart>
|
30
|
+
<namePart type="termsOfAddress">Prince</namePart>
|
31
|
+
<affiliation>Nirvana</affiliation>
|
32
|
+
<role>
|
33
|
+
<roleTerm authority="marcrelator" type="text">teacher</roleTerm>
|
34
|
+
</role>
|
35
|
+
</name>
|
36
|
+
|
37
|
+
<typeOfResource>text</typeOfResource>
|
38
|
+
<genre authority="local">journal article</genre>
|
39
|
+
|
40
|
+
<abstract>ABSTRACT</abstract>
|
41
|
+
<subject>
|
42
|
+
<topic>TOPIC 1</topic>
|
43
|
+
<topic>TOPIC 2</topic>
|
44
|
+
</subject>
|
45
|
+
<subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
|
46
|
+
<topic>CONTROLLED TERM</topic>
|
47
|
+
</subject>
|
48
|
+
|
49
|
+
<language>
|
50
|
+
<languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
|
51
|
+
</language>
|
52
|
+
|
53
|
+
<physicalDescription>
|
54
|
+
<internetMediaType>application/pdf</internetMediaType>
|
55
|
+
<extent>36 p.</extent>
|
56
|
+
</physicalDescription>
|
57
|
+
|
58
|
+
<relatedItem type="host">
|
59
|
+
<titleInfo>
|
60
|
+
<title>TITLE OF HOST JOURNAL</title>
|
61
|
+
</titleInfo>
|
62
|
+
<originInfo>
|
63
|
+
<publisher>PUBLISHER</publisher>
|
64
|
+
<dateIssued>DATE</dateIssued>
|
65
|
+
</originInfo>
|
66
|
+
<identifier type="issn">0013-8908</identifier>
|
67
|
+
<part>
|
68
|
+
<detail type="volume">
|
69
|
+
<number>2</number>
|
70
|
+
</detail>
|
71
|
+
<detail type="level">
|
72
|
+
<number>2</number>
|
73
|
+
</detail>
|
74
|
+
<extent unit="pages">
|
75
|
+
<start>195</start>
|
76
|
+
<end>230</end>
|
77
|
+
</extent>
|
78
|
+
<date>FEB. 2007</date>
|
79
|
+
</part>
|
80
|
+
</relatedItem>
|
81
|
+
|
82
|
+
<identifier type="uri">http://URL.edu.au/</identifier>
|
83
|
+
<identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
|
84
|
+
<location>
|
85
|
+
<url>http://URL.edu.au/</url>
|
86
|
+
</location>
|
87
|
+
<accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
|
88
|
+
<accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
|
89
|
+
|
90
|
+
</mods>
|
@@ -0,0 +1,16 @@
|
|
1
|
+
id: id
|
2
|
+
date: _date
|
3
|
+
string: _field
|
4
|
+
text: _field
|
5
|
+
symbol: _field
|
6
|
+
integer: _field
|
7
|
+
long: _field
|
8
|
+
boolean: _field
|
9
|
+
float: _field
|
10
|
+
double: _field
|
11
|
+
facet: _facet
|
12
|
+
display: _display
|
13
|
+
sort: _sort
|
14
|
+
unstemmed_search: _unstem_search
|
15
|
+
|
16
|
+
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "..", "spec_helper" )
|
2
|
+
|
3
|
+
# require 'solrizer'
|
4
|
+
# require 'solrizer/field_name_mapper'
|
5
|
+
|
6
|
+
class FieldNameMapperTest
|
7
|
+
include Solrizer::FieldNameMapper
|
8
|
+
end
|
9
|
+
|
10
|
+
def helper
|
11
|
+
@test_instance
|
12
|
+
end
|
13
|
+
|
14
|
+
describe Solrizer::FieldNameMapper do
|
15
|
+
|
16
|
+
before(:each) do
|
17
|
+
@test_instance = FieldNameMapperTest.new
|
18
|
+
end
|
19
|
+
|
20
|
+
after(:all) do
|
21
|
+
# Revert to default mappings after running tests
|
22
|
+
Solrizer::FieldNameMapper.load_mappings
|
23
|
+
end
|
24
|
+
|
25
|
+
describe ".solr_name" do
|
26
|
+
it "should generate solr field names from settings in solr_mappings" do
|
27
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
28
|
+
end
|
29
|
+
it "should format the response based on the class of the input" do
|
30
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
31
|
+
helper.solr_name("system_create", :date).should == "system_create_dt"
|
32
|
+
end
|
33
|
+
it "should rely on whichever mappings have been loaded into the SolrService" do
|
34
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
35
|
+
helper.solr_name(:foo, :text).should == :foo_t
|
36
|
+
Solrizer::FieldNameMapper.load_mappings(File.join(File.dirname(__FILE__), "..", "fixtures", "solr_mappings_af_0.1.yml"))
|
37
|
+
helper.solr_name(:system_create, :date).should == :system_create_date
|
38
|
+
helper.solr_name(:foo, :text).should == :foo_field
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/spec/units/shelver_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
-
require 'solrizer'
|
3
2
|
|
4
3
|
describe Solrizer::Solrizer do
|
5
4
|
|
@@ -9,17 +8,17 @@ describe Solrizer::Solrizer do
|
|
9
8
|
|
10
9
|
describe "solrize" do
|
11
10
|
it "should trigger the indexer for the provided object" do
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
mock_object.
|
18
|
-
ActiveFedora::Base.expects(:load_instance).
|
11
|
+
sample_obj = ActiveFedora::Base.new
|
12
|
+
@solrizer.indexer.expects(:index).with( sample_obj )
|
13
|
+
@solrizer.solrize( sample_obj )
|
14
|
+
end
|
15
|
+
it "should work with Fedora::FedoraObject objects" do
|
16
|
+
mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
|
17
|
+
ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
|
19
18
|
@solrizer.indexer.expects(:index).with( mock_object )
|
20
19
|
@solrizer.solrize( mock_object )
|
21
20
|
end
|
22
|
-
it "should
|
21
|
+
it "should load the object if only a pid is provided" do
|
23
22
|
mock_object = mock("my object")
|
24
23
|
mock_object.stubs(:pid)
|
25
24
|
mock_object.stubs(:label)
|
@@ -29,13 +28,14 @@ describe Solrizer::Solrizer do
|
|
29
28
|
@solrizer.indexer.expects(:index).with(mock_object)
|
30
29
|
@solrizer.solrize("_PID_")
|
31
30
|
end
|
31
|
+
|
32
32
|
end
|
33
33
|
|
34
34
|
describe "solrize_objects" do
|
35
|
-
it "should call solrize for each
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
it "should call solrize for each object returned by Fedora::Repository.find_objects" do
|
36
|
+
objects = [["pid1"], ["pid2"], ["pid3"]]
|
37
|
+
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
38
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object ) }
|
39
39
|
@solrizer.solrize_objects
|
40
40
|
end
|
41
41
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'solrizer'
|
3
|
+
require 'solrizer/xml'
|
4
|
+
|
5
|
+
describe Solrizer::XML::TerminologyBasedSolrizer do
|
6
|
+
|
7
|
+
before(:all) do
|
8
|
+
OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
|
9
|
+
end
|
10
|
+
|
11
|
+
before(:each) do
|
12
|
+
article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
|
13
|
+
@mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
|
14
|
+
end
|
15
|
+
|
16
|
+
describe ".to_solr" do
|
17
|
+
|
18
|
+
# after(:all) do
|
19
|
+
# # Revert to default mappings after running tests
|
20
|
+
# ActiveFedora::SolrService.load_mappings
|
21
|
+
# end
|
22
|
+
|
23
|
+
it "should provide .to_solr and return a SolrDocument" do
|
24
|
+
@mods_article.should respond_to(:to_solr)
|
25
|
+
@mods_article.to_solr.should be_kind_of(Solr::Document)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
|
29
|
+
doc = Solr::Document.new
|
30
|
+
@mods_article.to_solr(doc).should equal(doc)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
34
|
+
# mock_terms = {:name1=>:term1, :name2=>:term2}
|
35
|
+
# ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
|
36
|
+
solr_doc = Solr::Document.new
|
37
|
+
OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
38
|
+
@mods_article.expects(:solrize_term).with(v, solr_doc)
|
39
|
+
end
|
40
|
+
@mods_article.to_solr(solr_doc)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should use Solr mappings to generate field names" do
|
44
|
+
|
45
|
+
solr_doc = @mods_article.to_solr
|
46
|
+
#should have these
|
47
|
+
|
48
|
+
solr_doc[:abstract].should be_nil
|
49
|
+
solr_doc[:abstract_t].should == "ABSTRACT"
|
50
|
+
solr_doc[:title_info_1_language_t].should == "finnish"
|
51
|
+
solr_doc[:person_1_role_0_text_t].should == "teacher"
|
52
|
+
|
53
|
+
# These are a holdover from an old verison of OM
|
54
|
+
# solr_doc[:finnish_title_info_language_t].should == "finnish"
|
55
|
+
# solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
|
56
|
+
|
57
|
+
# solr_doc[:mydate_date].should == "fake-date"
|
58
|
+
#
|
59
|
+
# solr_doc[:publisher_t].should be_nil
|
60
|
+
# solr_doc[:coverage_t].should be_nil
|
61
|
+
# solr_doc[:creation_date_dt].should be_nil
|
62
|
+
# solr_doc.should == ""
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
describe ".solrize_term" do
|
69
|
+
|
70
|
+
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
71
|
+
solr_doc = Solr::Document.new
|
72
|
+
result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
73
|
+
result.should == solr_doc
|
74
|
+
# @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
describe ".solrize_node" do
|
80
|
+
it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
|
81
|
+
doc = Solr::Document.new
|
82
|
+
# @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
|
83
|
+
end
|
84
|
+
it "should create a solr field containing node.text"
|
85
|
+
it "should create hierarchical field entries if parents is not empty"
|
86
|
+
it "should only create one node if parents is empty"
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-15 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -35,9 +35,25 @@ dependencies:
|
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: om
|
39
39
|
prerelease: false
|
40
40
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 23
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 0
|
49
|
+
- 0
|
50
|
+
version: 1.0.0
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rspec
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
57
|
none: false
|
42
58
|
requirements:
|
43
59
|
- - ">="
|
@@ -49,7 +65,7 @@ dependencies:
|
|
49
65
|
- 9
|
50
66
|
version: 1.2.9
|
51
67
|
type: :development
|
52
|
-
version_requirements: *
|
68
|
+
version_requirements: *id003
|
53
69
|
description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
|
54
70
|
email: matt.zumwalt@yourmediashelf.com
|
55
71
|
executables: []
|
@@ -69,13 +85,17 @@ files:
|
|
69
85
|
- config/fedora.yml
|
70
86
|
- config/hydra_types.yml
|
71
87
|
- config/solr.yml
|
88
|
+
- config/solr_mappings.yml
|
72
89
|
- lib/solrizer.rb
|
73
90
|
- lib/solrizer/configuration.rb
|
74
91
|
- lib/solrizer/extractor.rb
|
92
|
+
- lib/solrizer/field_name_mapper.rb
|
75
93
|
- lib/solrizer/indexer.rb
|
76
94
|
- lib/solrizer/main.rb
|
77
95
|
- lib/solrizer/replicator.rb
|
78
96
|
- lib/solrizer/repository.rb
|
97
|
+
- lib/solrizer/xml.rb
|
98
|
+
- lib/solrizer/xml/terminology_based_solrizer.rb
|
79
99
|
- lib/tasks/solrizer.rake
|
80
100
|
- solrizer.gemspec
|
81
101
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
@@ -83,14 +103,18 @@ files:
|
|
83
103
|
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
84
104
|
- spec/fixtures/druid-cm234kq4672-stories.xml
|
85
105
|
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
106
|
+
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
86
107
|
- spec/fixtures/rels_ext_cmodel.xml
|
108
|
+
- spec/fixtures/solr_mappings_af_0.1.yml
|
87
109
|
- spec/integration/indexer_spec.rb
|
88
110
|
- spec/rcov.opts
|
89
111
|
- spec/spec.opts
|
90
112
|
- spec/spec_helper.rb
|
91
113
|
- spec/units/extractor_spec.rb
|
114
|
+
- spec/units/field_name_mapper_spec.rb
|
92
115
|
- spec/units/indexer_spec.rb
|
93
116
|
- spec/units/shelver_spec.rb
|
117
|
+
- spec/units/xml_terminology_based_solrizer_spec.rb
|
94
118
|
has_rdoc: true
|
95
119
|
homepage: http://github.com/projecthydra/solrizer
|
96
120
|
licenses: []
|
@@ -129,5 +153,7 @@ test_files:
|
|
129
153
|
- spec/integration/indexer_spec.rb
|
130
154
|
- spec/spec_helper.rb
|
131
155
|
- spec/units/extractor_spec.rb
|
156
|
+
- spec/units/field_name_mapper_spec.rb
|
132
157
|
- spec/units/indexer_spec.rb
|
133
158
|
- spec/units/shelver_spec.rb
|
159
|
+
- spec/units/xml_terminology_based_solrizer_spec.rb
|