solrizer 0.1.3 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Rakefile +1 -0
- data/VERSION +1 -1
- data/config/solr_mappings.yml +14 -0
- data/lib/solrizer/field_name_mapper.rb +62 -0
- data/lib/solrizer/xml/terminology_based_solrizer.rb +104 -0
- data/lib/solrizer/xml.rb +4 -0
- data/lib/solrizer.rb +18 -2
- data/solrizer.gemspec +17 -4
- data/spec/fixtures/mods_articles/hydrangea_article1.xml +90 -0
- data/spec/fixtures/solr_mappings_af_0.1.yml +16 -0
- data/spec/spec_helper.rb +2 -0
- data/spec/units/field_name_mapper_spec.rb +41 -0
- data/spec/units/shelver_spec.rb +13 -13
- data/spec/units/xml_terminology_based_solrizer_spec.rb +88 -0
- metadata +33 -7
data/Rakefile
CHANGED
@@ -11,6 +11,7 @@ begin
|
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
13
|
gem.add_dependency "active-fedora", ">= 1.1.5"
|
14
|
+
gem.add_dependency "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
|
14
15
|
gem.add_development_dependency "rspec", ">= 1.2.9"
|
15
16
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
16
17
|
end
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.2.0
|
@@ -0,0 +1,62 @@
|
|
1
|
+
require "yaml"
|
2
|
+
|
3
|
+
module Solrizer
|
4
|
+
module FieldNameMapper
|
5
|
+
|
6
|
+
# Module Methods & Attributes
|
7
|
+
@@mappings = {}
|
8
|
+
|
9
|
+
# Generates solr field names from settings in solr_mappings
|
10
|
+
def self.solr_name(field_name, field_type)
|
11
|
+
name = field_name.to_s + self.mappings[field_type.to_s].to_s
|
12
|
+
if field_name.kind_of?(Symbol)
|
13
|
+
return name.to_sym
|
14
|
+
else
|
15
|
+
return name.to_s
|
16
|
+
end
|
17
|
+
end
|
18
|
+
|
19
|
+
def self.mappings
|
20
|
+
@@mappings
|
21
|
+
end
|
22
|
+
|
23
|
+
def self.mappings=(mappings)
|
24
|
+
@@mappings = mappings
|
25
|
+
end
|
26
|
+
|
27
|
+
# Instance Methods
|
28
|
+
|
29
|
+
def solr_name(field_name, field_type)
|
30
|
+
::Solrizer::FieldNameMapper.solr_name(field_name, field_type)
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.logger
|
34
|
+
@logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
|
35
|
+
end
|
36
|
+
|
37
|
+
# Loads solr mappings from yml file.
|
38
|
+
# @config_path This is the path to the directory where your mappings file is stored. @default "RAILS_ROOT/config/solr_mappings.yml"
|
39
|
+
# @mappings_file This is the filename for your solr mappings YAML file. @default solr_mappings.yml
|
40
|
+
def self.load_mappings( config_path=nil )
|
41
|
+
|
42
|
+
if config_path.nil?
|
43
|
+
if defined?(RAILS_ROOT)
|
44
|
+
config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
|
45
|
+
end
|
46
|
+
# Default to using the config file within the gem
|
47
|
+
if !File.exist?(config_path.to_s)
|
48
|
+
config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
|
49
|
+
end
|
50
|
+
end
|
51
|
+
|
52
|
+
logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
|
53
|
+
|
54
|
+
@@mappings = YAML::load(File.open(config_path))
|
55
|
+
|
56
|
+
mappings["id"] = "id" unless mappings["id"]
|
57
|
+
end
|
58
|
+
|
59
|
+
# This ensures that some mappings will always be loaded
|
60
|
+
self.load_mappings
|
61
|
+
end #FieldNameMapper
|
62
|
+
end #Solrizer
|
@@ -0,0 +1,104 @@
|
|
1
|
+
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
|
2
|
+
module Solrizer::XML::TerminologyBasedSolrizer
|
3
|
+
|
4
|
+
# Module Methods
|
5
|
+
|
6
|
+
# Build a solr document from +doc+ based on its terminology
|
7
|
+
# @doc OM::XML::Document
|
8
|
+
# @solr_doc (optional) Solr::Document to populate
|
9
|
+
def self.solrize(doc, solr_doc=Solr::Document.new)
|
10
|
+
unless doc.class.terminology.nil?
|
11
|
+
doc.class.terminology.terms.each_pair do |term_name,term|
|
12
|
+
doc.solrize_term(term, solr_doc)
|
13
|
+
# self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
|
17
|
+
return solr_doc
|
18
|
+
end
|
19
|
+
|
20
|
+
# Populate a solr document with fields based on nodes in +xml+ corresponding to the
|
21
|
+
# term identified by +term_pointer+ within +terminology+
|
22
|
+
# @doc OM::XML::Document or Nokogiri::XML::Node
|
23
|
+
# @term_pointer Array pointing to the desired term in +terminology+
|
24
|
+
def self.solrize_term(doc, term, solr_doc = Solr::Document.new, opts={})
|
25
|
+
terminology = doc.class.terminology
|
26
|
+
parents = opts.fetch(:parents, [])
|
27
|
+
|
28
|
+
term_pointer = parents+[term.name]
|
29
|
+
|
30
|
+
# term = terminology.retrieve_term(term_pointer)
|
31
|
+
|
32
|
+
# prep children hash
|
33
|
+
# child_accessors = accessor_info.fetch(:children, {})
|
34
|
+
# xpath = term.xpath_for(*term_pointer)
|
35
|
+
nodeset = doc.find_by_terms(*term_pointer)
|
36
|
+
|
37
|
+
nodeset.each do |node|
|
38
|
+
# create solr fields
|
39
|
+
|
40
|
+
self.solrize_node(node, doc, term_pointer, term, solr_doc)
|
41
|
+
term.children.each_pair do |child_term_name, child_term|
|
42
|
+
doc.solrize_term(child_term, solr_doc, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
43
|
+
# self.solrize_term(doc, child_term_name, child_term, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
|
44
|
+
end
|
45
|
+
end
|
46
|
+
solr_doc
|
47
|
+
end
|
48
|
+
|
49
|
+
# Populate a solr document with solr fields corresponding to the given xml node
|
50
|
+
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
51
|
+
# @doc OM::XML::Document or Nokogiri::XML::Node
|
52
|
+
# @term_pointer Array pointing to the desired term in +terminology+
|
53
|
+
# @solr_doc (optional) Solr::Document to populate
|
54
|
+
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new)
|
55
|
+
terminology = doc.class.terminology
|
56
|
+
# term = terminology.retrieve_term(*term_pointer)
|
57
|
+
|
58
|
+
if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
|
59
|
+
node_value = node.value
|
60
|
+
else
|
61
|
+
node_value = node.text
|
62
|
+
end
|
63
|
+
|
64
|
+
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
65
|
+
generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
|
66
|
+
|
67
|
+
solr_doc << Solr::Field.new(generic_field_name => node_value)
|
68
|
+
|
69
|
+
if term_pointer.length > 1
|
70
|
+
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
71
|
+
hierarchical_field_name = self.generate_solr_symbol(hierarchical_field_name_base, term.data_type)
|
72
|
+
solr_doc << Solr::Field.new(hierarchical_field_name => node_value)
|
73
|
+
end
|
74
|
+
solr_doc
|
75
|
+
end
|
76
|
+
|
77
|
+
# Use Solrizer::FieldNameMapper to generate an appropriate solr field name +field_name+ and +field_type+
|
78
|
+
def self.generate_solr_symbol(field_name, field_type) # :nodoc:
|
79
|
+
Solrizer::FieldNameMapper.solr_name(field_name, field_type)
|
80
|
+
end
|
81
|
+
|
82
|
+
# Instance Methods
|
83
|
+
|
84
|
+
|
85
|
+
def to_solr(solr_doc = Solr::Document.new) # :nodoc:
|
86
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc)
|
87
|
+
end
|
88
|
+
|
89
|
+
|
90
|
+
def solrize_term(term, solr_doc = Solr::Document.new, opts={})
|
91
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, opts)
|
92
|
+
end
|
93
|
+
|
94
|
+
def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new)
|
95
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc)
|
96
|
+
end
|
97
|
+
|
98
|
+
protected
|
99
|
+
|
100
|
+
def generate_solr_symbol(field_name, field_type) # :nodoc:
|
101
|
+
Solrizer::XML::TerminologyBasedSolrizer.generate_solr_symbol(field_name, field_type)
|
102
|
+
end
|
103
|
+
|
104
|
+
end
|
data/lib/solrizer/xml.rb
ADDED
data/lib/solrizer.rb
CHANGED
@@ -1,10 +1,14 @@
|
|
1
1
|
require 'rubygems'
|
2
2
|
require 'solrizer/indexer.rb'
|
3
|
+
require 'solrizer/field_name_mapper.rb'
|
4
|
+
|
5
|
+
# Let people explicitly require xml support if they want it ...
|
6
|
+
# require 'solrizer/xml.rb'
|
7
|
+
|
3
8
|
# require 'fastercsv'
|
4
9
|
require "ruby-debug"
|
5
10
|
|
6
11
|
|
7
|
-
|
8
12
|
module Solrizer
|
9
13
|
class Solrizer
|
10
14
|
|
@@ -34,7 +38,19 @@ class Solrizer
|
|
34
38
|
|
35
39
|
start = Time.now
|
36
40
|
print "Retrieving object #{obj} ..."
|
37
|
-
|
41
|
+
|
42
|
+
case obj
|
43
|
+
when ActiveFedora::Base
|
44
|
+
# do nothing
|
45
|
+
when Fedora::FedoraObject
|
46
|
+
obj = Repository.get_object( obj.pid )
|
47
|
+
when String
|
48
|
+
obj = Repository.get_object( obj )
|
49
|
+
else
|
50
|
+
raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
|
51
|
+
end
|
52
|
+
|
53
|
+
# obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
|
38
54
|
|
39
55
|
obj_done = Time.now
|
40
56
|
obj_done_elapse = obj_done - start
|
data/solrizer.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{solrizer}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.2.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matt Zumwalt"]
|
12
|
-
s.date = %q{2010-09-
|
12
|
+
s.date = %q{2010-09-15}
|
13
13
|
s.description = %q{Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener}
|
14
14
|
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -26,13 +26,17 @@ Gem::Specification.new do |s|
|
|
26
26
|
"config/fedora.yml",
|
27
27
|
"config/hydra_types.yml",
|
28
28
|
"config/solr.yml",
|
29
|
+
"config/solr_mappings.yml",
|
29
30
|
"lib/solrizer.rb",
|
30
31
|
"lib/solrizer/configuration.rb",
|
31
32
|
"lib/solrizer/extractor.rb",
|
33
|
+
"lib/solrizer/field_name_mapper.rb",
|
32
34
|
"lib/solrizer/indexer.rb",
|
33
35
|
"lib/solrizer/main.rb",
|
34
36
|
"lib/solrizer/replicator.rb",
|
35
37
|
"lib/solrizer/repository.rb",
|
38
|
+
"lib/solrizer/xml.rb",
|
39
|
+
"lib/solrizer/xml/terminology_based_solrizer.rb",
|
36
40
|
"lib/tasks/solrizer.rake",
|
37
41
|
"solrizer.gemspec",
|
38
42
|
"spec/fixtures/druid-bv448hq0314-descMetadata.xml",
|
@@ -40,14 +44,18 @@ Gem::Specification.new do |s|
|
|
40
44
|
"spec/fixtures/druid-cm234kq4672-extProperties.xml",
|
41
45
|
"spec/fixtures/druid-cm234kq4672-stories.xml",
|
42
46
|
"spec/fixtures/druid-hc513kw4806-descMetadata.xml",
|
47
|
+
"spec/fixtures/mods_articles/hydrangea_article1.xml",
|
43
48
|
"spec/fixtures/rels_ext_cmodel.xml",
|
49
|
+
"spec/fixtures/solr_mappings_af_0.1.yml",
|
44
50
|
"spec/integration/indexer_spec.rb",
|
45
51
|
"spec/rcov.opts",
|
46
52
|
"spec/spec.opts",
|
47
53
|
"spec/spec_helper.rb",
|
48
54
|
"spec/units/extractor_spec.rb",
|
55
|
+
"spec/units/field_name_mapper_spec.rb",
|
49
56
|
"spec/units/indexer_spec.rb",
|
50
|
-
"spec/units/shelver_spec.rb"
|
57
|
+
"spec/units/shelver_spec.rb",
|
58
|
+
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
51
59
|
]
|
52
60
|
s.homepage = %q{http://github.com/projecthydra/solrizer}
|
53
61
|
s.rdoc_options = ["--charset=UTF-8"]
|
@@ -58,8 +66,10 @@ Gem::Specification.new do |s|
|
|
58
66
|
"spec/integration/indexer_spec.rb",
|
59
67
|
"spec/spec_helper.rb",
|
60
68
|
"spec/units/extractor_spec.rb",
|
69
|
+
"spec/units/field_name_mapper_spec.rb",
|
61
70
|
"spec/units/indexer_spec.rb",
|
62
|
-
"spec/units/shelver_spec.rb"
|
71
|
+
"spec/units/shelver_spec.rb",
|
72
|
+
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
63
73
|
]
|
64
74
|
|
65
75
|
if s.respond_to? :specification_version then
|
@@ -68,13 +78,16 @@ Gem::Specification.new do |s|
|
|
68
78
|
|
69
79
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
70
80
|
s.add_runtime_dependency(%q<active-fedora>, [">= 1.1.5"])
|
81
|
+
s.add_runtime_dependency(%q<om>, [">= 1.0.0"])
|
71
82
|
s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
|
72
83
|
else
|
73
84
|
s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
|
85
|
+
s.add_dependency(%q<om>, [">= 1.0.0"])
|
74
86
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
75
87
|
end
|
76
88
|
else
|
77
89
|
s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
|
90
|
+
s.add_dependency(%q<om>, [">= 1.0.0"])
|
78
91
|
s.add_dependency(%q<rspec>, [">= 1.2.9"])
|
79
92
|
end
|
80
93
|
end
|
@@ -0,0 +1,90 @@
|
|
1
|
+
<mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
|
2
|
+
http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
|
3
|
+
|
4
|
+
<titleInfo>
|
5
|
+
<nonSort>THE</nonSort>
|
6
|
+
<title>ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
|
7
|
+
<subTitle>SUBTITLE</subTitle>
|
8
|
+
</titleInfo>
|
9
|
+
<titleInfo lang="finnish">
|
10
|
+
<title>Artikkelin otsikko Hydrangea artiklan 1</title>
|
11
|
+
</titleInfo>
|
12
|
+
|
13
|
+
<name type="personal">
|
14
|
+
<namePart type="family">FAMILY NAME</namePart>
|
15
|
+
<namePart type="given">GIVEN NAMES</namePart>
|
16
|
+
<namePart type="termsOfAddress">DR.</namePart>
|
17
|
+
<displayForm>NAME AS IT APPEARS</displayForm>
|
18
|
+
<affiliation>FACULTY, UNIVERSITY</affiliation>
|
19
|
+
<role>
|
20
|
+
<roleTerm authority="marcrelator" type="text">creator</roleTerm>
|
21
|
+
</role>
|
22
|
+
<role>
|
23
|
+
<roleTerm type="text">submitter</roleTerm>
|
24
|
+
</role>
|
25
|
+
</name>
|
26
|
+
|
27
|
+
<name type="personal">
|
28
|
+
<namePart type="family">Gautama</namePart>
|
29
|
+
<namePart type="given">Siddartha</namePart>
|
30
|
+
<namePart type="termsOfAddress">Prince</namePart>
|
31
|
+
<affiliation>Nirvana</affiliation>
|
32
|
+
<role>
|
33
|
+
<roleTerm authority="marcrelator" type="text">teacher</roleTerm>
|
34
|
+
</role>
|
35
|
+
</name>
|
36
|
+
|
37
|
+
<typeOfResource>text</typeOfResource>
|
38
|
+
<genre authority="local">journal article</genre>
|
39
|
+
|
40
|
+
<abstract>ABSTRACT</abstract>
|
41
|
+
<subject>
|
42
|
+
<topic>TOPIC 1</topic>
|
43
|
+
<topic>TOPIC 2</topic>
|
44
|
+
</subject>
|
45
|
+
<subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
|
46
|
+
<topic>CONTROLLED TERM</topic>
|
47
|
+
</subject>
|
48
|
+
|
49
|
+
<language>
|
50
|
+
<languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
|
51
|
+
</language>
|
52
|
+
|
53
|
+
<physicalDescription>
|
54
|
+
<internetMediaType>application/pdf</internetMediaType>
|
55
|
+
<extent>36 p.</extent>
|
56
|
+
</physicalDescription>
|
57
|
+
|
58
|
+
<relatedItem type="host">
|
59
|
+
<titleInfo>
|
60
|
+
<title>TITLE OF HOST JOURNAL</title>
|
61
|
+
</titleInfo>
|
62
|
+
<originInfo>
|
63
|
+
<publisher>PUBLISHER</publisher>
|
64
|
+
<dateIssued>DATE</dateIssued>
|
65
|
+
</originInfo>
|
66
|
+
<identifier type="issn">0013-8908</identifier>
|
67
|
+
<part>
|
68
|
+
<detail type="volume">
|
69
|
+
<number>2</number>
|
70
|
+
</detail>
|
71
|
+
<detail type="level">
|
72
|
+
<number>2</number>
|
73
|
+
</detail>
|
74
|
+
<extent unit="pages">
|
75
|
+
<start>195</start>
|
76
|
+
<end>230</end>
|
77
|
+
</extent>
|
78
|
+
<date>FEB. 2007</date>
|
79
|
+
</part>
|
80
|
+
</relatedItem>
|
81
|
+
|
82
|
+
<identifier type="uri">http://URL.edu.au/</identifier>
|
83
|
+
<identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
|
84
|
+
<location>
|
85
|
+
<url>http://URL.edu.au/</url>
|
86
|
+
</location>
|
87
|
+
<accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
|
88
|
+
<accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
|
89
|
+
|
90
|
+
</mods>
|
@@ -0,0 +1,16 @@
|
|
1
|
+
id: id
|
2
|
+
date: _date
|
3
|
+
string: _field
|
4
|
+
text: _field
|
5
|
+
symbol: _field
|
6
|
+
integer: _field
|
7
|
+
long: _field
|
8
|
+
boolean: _field
|
9
|
+
float: _field
|
10
|
+
double: _field
|
11
|
+
facet: _facet
|
12
|
+
display: _display
|
13
|
+
sort: _sort
|
14
|
+
unstemmed_search: _unstem_search
|
15
|
+
|
16
|
+
|
data/spec/spec_helper.rb
CHANGED
@@ -0,0 +1,41 @@
|
|
1
|
+
require File.join( File.dirname(__FILE__), "..", "spec_helper" )
|
2
|
+
|
3
|
+
# require 'solrizer'
|
4
|
+
# require 'solrizer/field_name_mapper'
|
5
|
+
|
6
|
+
class FieldNameMapperTest
|
7
|
+
include Solrizer::FieldNameMapper
|
8
|
+
end
|
9
|
+
|
10
|
+
def helper
|
11
|
+
@test_instance
|
12
|
+
end
|
13
|
+
|
14
|
+
describe Solrizer::FieldNameMapper do
|
15
|
+
|
16
|
+
before(:each) do
|
17
|
+
@test_instance = FieldNameMapperTest.new
|
18
|
+
end
|
19
|
+
|
20
|
+
after(:all) do
|
21
|
+
# Revert to default mappings after running tests
|
22
|
+
Solrizer::FieldNameMapper.load_mappings
|
23
|
+
end
|
24
|
+
|
25
|
+
describe ".solr_name" do
|
26
|
+
it "should generate solr field names from settings in solr_mappings" do
|
27
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
28
|
+
end
|
29
|
+
it "should format the response based on the class of the input" do
|
30
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
31
|
+
helper.solr_name("system_create", :date).should == "system_create_dt"
|
32
|
+
end
|
33
|
+
it "should rely on whichever mappings have been loaded into the SolrService" do
|
34
|
+
helper.solr_name(:system_create, :date).should == :system_create_dt
|
35
|
+
helper.solr_name(:foo, :text).should == :foo_t
|
36
|
+
Solrizer::FieldNameMapper.load_mappings(File.join(File.dirname(__FILE__), "..", "fixtures", "solr_mappings_af_0.1.yml"))
|
37
|
+
helper.solr_name(:system_create, :date).should == :system_create_date
|
38
|
+
helper.solr_name(:foo, :text).should == :foo_field
|
39
|
+
end
|
40
|
+
end
|
41
|
+
end
|
data/spec/units/shelver_spec.rb
CHANGED
@@ -1,5 +1,4 @@
|
|
1
1
|
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
-
require 'solrizer'
|
3
2
|
|
4
3
|
describe Solrizer::Solrizer do
|
5
4
|
|
@@ -9,17 +8,17 @@ describe Solrizer::Solrizer do
|
|
9
8
|
|
10
9
|
describe "solrize" do
|
11
10
|
it "should trigger the indexer for the provided object" do
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
mock_object.
|
18
|
-
ActiveFedora::Base.expects(:load_instance).
|
11
|
+
sample_obj = ActiveFedora::Base.new
|
12
|
+
@solrizer.indexer.expects(:index).with( sample_obj )
|
13
|
+
@solrizer.solrize( sample_obj )
|
14
|
+
end
|
15
|
+
it "should work with Fedora::FedoraObject objects" do
|
16
|
+
mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
|
17
|
+
ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
|
19
18
|
@solrizer.indexer.expects(:index).with( mock_object )
|
20
19
|
@solrizer.solrize( mock_object )
|
21
20
|
end
|
22
|
-
it "should
|
21
|
+
it "should load the object if only a pid is provided" do
|
23
22
|
mock_object = mock("my object")
|
24
23
|
mock_object.stubs(:pid)
|
25
24
|
mock_object.stubs(:label)
|
@@ -29,13 +28,14 @@ describe Solrizer::Solrizer do
|
|
29
28
|
@solrizer.indexer.expects(:index).with(mock_object)
|
30
29
|
@solrizer.solrize("_PID_")
|
31
30
|
end
|
31
|
+
|
32
32
|
end
|
33
33
|
|
34
34
|
describe "solrize_objects" do
|
35
|
-
it "should call solrize for each
|
36
|
-
|
37
|
-
|
38
|
-
|
35
|
+
it "should call solrize for each object returned by Fedora::Repository.find_objects" do
|
36
|
+
objects = [["pid1"], ["pid2"], ["pid3"]]
|
37
|
+
Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
|
38
|
+
objects.each {|object| @solrizer.expects(:solrize).with( object ) }
|
39
39
|
@solrizer.solrize_objects
|
40
40
|
end
|
41
41
|
end
|
@@ -0,0 +1,88 @@
|
|
1
|
+
require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
|
2
|
+
require 'solrizer'
|
3
|
+
require 'solrizer/xml'
|
4
|
+
|
5
|
+
describe Solrizer::XML::TerminologyBasedSolrizer do
|
6
|
+
|
7
|
+
before(:all) do
|
8
|
+
OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
|
9
|
+
end
|
10
|
+
|
11
|
+
before(:each) do
|
12
|
+
article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
|
13
|
+
@mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
|
14
|
+
end
|
15
|
+
|
16
|
+
describe ".to_solr" do
|
17
|
+
|
18
|
+
# after(:all) do
|
19
|
+
# # Revert to default mappings after running tests
|
20
|
+
# ActiveFedora::SolrService.load_mappings
|
21
|
+
# end
|
22
|
+
|
23
|
+
it "should provide .to_solr and return a SolrDocument" do
|
24
|
+
@mods_article.should respond_to(:to_solr)
|
25
|
+
@mods_article.to_solr.should be_kind_of(Solr::Document)
|
26
|
+
end
|
27
|
+
|
28
|
+
it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
|
29
|
+
doc = Solr::Document.new
|
30
|
+
@mods_article.to_solr(doc).should equal(doc)
|
31
|
+
end
|
32
|
+
|
33
|
+
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
34
|
+
# mock_terms = {:name1=>:term1, :name2=>:term2}
|
35
|
+
# ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
|
36
|
+
solr_doc = Solr::Document.new
|
37
|
+
OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
38
|
+
@mods_article.expects(:solrize_term).with(v, solr_doc)
|
39
|
+
end
|
40
|
+
@mods_article.to_solr(solr_doc)
|
41
|
+
end
|
42
|
+
|
43
|
+
it "should use Solr mappings to generate field names" do
|
44
|
+
|
45
|
+
solr_doc = @mods_article.to_solr
|
46
|
+
#should have these
|
47
|
+
|
48
|
+
solr_doc[:abstract].should be_nil
|
49
|
+
solr_doc[:abstract_t].should == "ABSTRACT"
|
50
|
+
solr_doc[:title_info_1_language_t].should == "finnish"
|
51
|
+
solr_doc[:person_1_role_0_text_t].should == "teacher"
|
52
|
+
|
53
|
+
# These are a holdover from an old verison of OM
|
54
|
+
# solr_doc[:finnish_title_info_language_t].should == "finnish"
|
55
|
+
# solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
|
56
|
+
|
57
|
+
# solr_doc[:mydate_date].should == "fake-date"
|
58
|
+
#
|
59
|
+
# solr_doc[:publisher_t].should be_nil
|
60
|
+
# solr_doc[:coverage_t].should be_nil
|
61
|
+
# solr_doc[:creation_date_dt].should be_nil
|
62
|
+
# solr_doc.should == ""
|
63
|
+
|
64
|
+
end
|
65
|
+
|
66
|
+
end
|
67
|
+
|
68
|
+
describe ".solrize_term" do
|
69
|
+
|
70
|
+
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
71
|
+
solr_doc = Solr::Document.new
|
72
|
+
result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
73
|
+
result.should == solr_doc
|
74
|
+
# @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
|
75
|
+
end
|
76
|
+
|
77
|
+
end
|
78
|
+
|
79
|
+
describe ".solrize_node" do
|
80
|
+
it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
|
81
|
+
doc = Solr::Document.new
|
82
|
+
# @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
|
83
|
+
end
|
84
|
+
it "should create a solr field containing node.text"
|
85
|
+
it "should create hierarchical field entries if parents is not empty"
|
86
|
+
it "should only create one node if parents is empty"
|
87
|
+
end
|
88
|
+
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 23
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
9
|
-
-
|
10
|
-
version: 0.
|
8
|
+
- 2
|
9
|
+
- 0
|
10
|
+
version: 0.2.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2010-09-
|
18
|
+
date: 2010-09-15 00:00:00 -05:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|
@@ -35,9 +35,25 @@ dependencies:
|
|
35
35
|
type: :runtime
|
36
36
|
version_requirements: *id001
|
37
37
|
- !ruby/object:Gem::Dependency
|
38
|
-
name:
|
38
|
+
name: om
|
39
39
|
prerelease: false
|
40
40
|
requirement: &id002 !ruby/object:Gem::Requirement
|
41
|
+
none: false
|
42
|
+
requirements:
|
43
|
+
- - ">="
|
44
|
+
- !ruby/object:Gem::Version
|
45
|
+
hash: 23
|
46
|
+
segments:
|
47
|
+
- 1
|
48
|
+
- 0
|
49
|
+
- 0
|
50
|
+
version: 1.0.0
|
51
|
+
type: :runtime
|
52
|
+
version_requirements: *id002
|
53
|
+
- !ruby/object:Gem::Dependency
|
54
|
+
name: rspec
|
55
|
+
prerelease: false
|
56
|
+
requirement: &id003 !ruby/object:Gem::Requirement
|
41
57
|
none: false
|
42
58
|
requirements:
|
43
59
|
- - ">="
|
@@ -49,7 +65,7 @@ dependencies:
|
|
49
65
|
- 9
|
50
66
|
version: 1.2.9
|
51
67
|
type: :development
|
52
|
-
version_requirements: *
|
68
|
+
version_requirements: *id003
|
53
69
|
description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
|
54
70
|
email: matt.zumwalt@yourmediashelf.com
|
55
71
|
executables: []
|
@@ -69,13 +85,17 @@ files:
|
|
69
85
|
- config/fedora.yml
|
70
86
|
- config/hydra_types.yml
|
71
87
|
- config/solr.yml
|
88
|
+
- config/solr_mappings.yml
|
72
89
|
- lib/solrizer.rb
|
73
90
|
- lib/solrizer/configuration.rb
|
74
91
|
- lib/solrizer/extractor.rb
|
92
|
+
- lib/solrizer/field_name_mapper.rb
|
75
93
|
- lib/solrizer/indexer.rb
|
76
94
|
- lib/solrizer/main.rb
|
77
95
|
- lib/solrizer/replicator.rb
|
78
96
|
- lib/solrizer/repository.rb
|
97
|
+
- lib/solrizer/xml.rb
|
98
|
+
- lib/solrizer/xml/terminology_based_solrizer.rb
|
79
99
|
- lib/tasks/solrizer.rake
|
80
100
|
- solrizer.gemspec
|
81
101
|
- spec/fixtures/druid-bv448hq0314-descMetadata.xml
|
@@ -83,14 +103,18 @@ files:
|
|
83
103
|
- spec/fixtures/druid-cm234kq4672-extProperties.xml
|
84
104
|
- spec/fixtures/druid-cm234kq4672-stories.xml
|
85
105
|
- spec/fixtures/druid-hc513kw4806-descMetadata.xml
|
106
|
+
- spec/fixtures/mods_articles/hydrangea_article1.xml
|
86
107
|
- spec/fixtures/rels_ext_cmodel.xml
|
108
|
+
- spec/fixtures/solr_mappings_af_0.1.yml
|
87
109
|
- spec/integration/indexer_spec.rb
|
88
110
|
- spec/rcov.opts
|
89
111
|
- spec/spec.opts
|
90
112
|
- spec/spec_helper.rb
|
91
113
|
- spec/units/extractor_spec.rb
|
114
|
+
- spec/units/field_name_mapper_spec.rb
|
92
115
|
- spec/units/indexer_spec.rb
|
93
116
|
- spec/units/shelver_spec.rb
|
117
|
+
- spec/units/xml_terminology_based_solrizer_spec.rb
|
94
118
|
has_rdoc: true
|
95
119
|
homepage: http://github.com/projecthydra/solrizer
|
96
120
|
licenses: []
|
@@ -129,5 +153,7 @@ test_files:
|
|
129
153
|
- spec/integration/indexer_spec.rb
|
130
154
|
- spec/spec_helper.rb
|
131
155
|
- spec/units/extractor_spec.rb
|
156
|
+
- spec/units/field_name_mapper_spec.rb
|
132
157
|
- spec/units/indexer_spec.rb
|
133
158
|
- spec/units/shelver_spec.rb
|
159
|
+
- spec/units/xml_terminology_based_solrizer_spec.rb
|