solrizer 1.0.0 → 1.0.1
Sign up to get free protection for your applications and to get access to all the features.
- data/Gemfile +1 -1
- data/Gemfile.lock +15 -18
- data/History.txt +15 -0
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/solrizer/extractor.rb +42 -30
- data/lib/solrizer/html/extractor.rb +5 -5
- data/lib/solrizer/xml/extractor.rb +20 -19
- data/lib/solrizer/xml/terminology_based_solrizer.rb +17 -21
- data/solrizer.gemspec +1 -1
- data/spec/units/extractor_spec.rb +4 -46
- data/spec/units/xml_extractor_spec.rb +5 -4
- data/spec/units/xml_terminology_based_solrizer_spec.rb +16 -19
- metadata +3 -3
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,33 +1,30 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
columnize (0.3.
|
5
|
-
facets (2.9.
|
6
|
-
gemcutter (0.6.1)
|
4
|
+
columnize (0.3.2)
|
5
|
+
facets (2.9.1)
|
7
6
|
git (1.2.5)
|
8
|
-
jeweler (1.
|
9
|
-
|
7
|
+
jeweler (1.5.2)
|
8
|
+
bundler (~> 1.0.0)
|
10
9
|
git (>= 1.2.5)
|
11
|
-
|
12
|
-
json_pure (1.4.6)
|
10
|
+
rake
|
13
11
|
linecache (0.43)
|
14
12
|
mediashelf-loggable (0.4.0)
|
15
|
-
mocha (0.9.
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
mocha (0.9.12)
|
14
|
+
nokogiri (1.4.4)
|
15
|
+
om (1.0.2)
|
16
|
+
facets
|
19
17
|
facets
|
18
|
+
nokogiri
|
20
19
|
nokogiri (>= 1.4.2)
|
21
20
|
rake (0.8.7)
|
22
21
|
rspec (1.3.1)
|
23
|
-
ruby-debug (0.10.
|
22
|
+
ruby-debug (0.10.4)
|
24
23
|
columnize (>= 0.1)
|
25
|
-
ruby-debug-base (~> 0.10.
|
26
|
-
ruby-debug-base (0.10.
|
24
|
+
ruby-debug-base (~> 0.10.4.0)
|
25
|
+
ruby-debug-base (0.10.4)
|
27
26
|
linecache (>= 0.3)
|
28
|
-
|
29
|
-
json_pure (>= 1.1.7)
|
30
|
-
solr-ruby (0.0.8)
|
27
|
+
xml-simple (1.0.14)
|
31
28
|
|
32
29
|
PLATFORMS
|
33
30
|
ruby
|
@@ -41,4 +38,4 @@ DEPENDENCIES
|
|
41
38
|
rspec (< 2.0.0)
|
42
39
|
ruby-debug
|
43
40
|
ruby-debug-base
|
44
|
-
|
41
|
+
xml-simple
|
data/History.txt
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
h2. 1.0.0
|
2
|
+
|
3
|
+
Deprecated
|
4
|
+
|
5
|
+
* extract_tag
|
6
|
+
* extract_tags
|
7
|
+
* extract_hash
|
8
|
+
|
9
|
+
|
10
|
+
All solrize and extract methods now accept and return a _Hash_ rather than a Solr::Document
|
11
|
+
Removed dependency on outdated "solr" gem, replaced it with RSolr
|
12
|
+
As part of this switch, *all field names are stored as Strings, not Symbols*. If you previously accessed something as solr_doc[:title_t], now you need to access it as solr_doc["title_t"]
|
13
|
+
|
14
|
+
Moved #format_node_value and #insert_solr_field_value from TerminologyBasedSolrizer to Solrizer::Extractor
|
15
|
+
|
1
16
|
h2. 0.3.1
|
2
17
|
|
3
18
|
fix in require statements
|
data/Rakefile
CHANGED
@@ -10,7 +10,6 @@ begin
|
|
10
10
|
gem.email = "matt.zumwalt@yourmediashelf.com"
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
|
-
gem.add_dependency "solr-ruby"
|
14
13
|
gem.add_dependency "nokogiri"
|
15
14
|
gem.add_dependency "om"
|
16
15
|
gem.add_dependency "nokogiri"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.1
|
data/lib/solrizer/extractor.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'solr'
|
2
|
-
require 'rexml/document'
|
3
1
|
require "nokogiri"
|
4
2
|
require 'yaml'
|
5
3
|
|
@@ -12,37 +10,51 @@ module Solrizer
|
|
12
10
|
# with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
|
13
11
|
#
|
14
12
|
class Extractor
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
|
29
|
-
when "Array"
|
30
|
-
value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
if input_hash.has_key?(:symbols)
|
35
|
-
input_hash[:symbols].each do |symbol_name, value|
|
36
|
-
case value.class.to_s
|
37
|
-
when "String"
|
38
|
-
solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
|
39
|
-
when "Array"
|
40
|
-
value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
|
41
|
-
end
|
42
|
-
end
|
13
|
+
|
14
|
+
# Insert +field_value+ for +field_name+ into +solr_doc+
|
15
|
+
# Ensures that field values are always appended to arrays within the values hash.
|
16
|
+
# Also ensures that values are run through format_node_value
|
17
|
+
# @param [Hash] solr_doc
|
18
|
+
# @param [String] field_name
|
19
|
+
# @param [String] field_value
|
20
|
+
def self.insert_solr_field_value(solr_doc, field_name, field_value)
|
21
|
+
formatted_value = self.format_node_value(field_value)
|
22
|
+
if solr_doc.has_key?(field_name)
|
23
|
+
solr_doc[field_name] << formatted_value
|
24
|
+
else
|
25
|
+
solr_doc.merge!( {field_name => [formatted_value]} )
|
43
26
|
end
|
44
27
|
return solr_doc
|
45
28
|
end
|
46
29
|
|
30
|
+
# Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
|
31
|
+
# @param [Array] values Array of strings representing the values returned
|
32
|
+
def self.format_node_value values
|
33
|
+
values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
|
34
|
+
end
|
35
|
+
|
36
|
+
# Instance Methods
|
37
|
+
|
38
|
+
# Alias for Solrizer::Extractor#insert_solr_field_value
|
39
|
+
def insert_solr_field_value(solr_doc, field_name, field_value)
|
40
|
+
Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Alias for Solrizer::Extractor#format_node_value
|
44
|
+
def format_node_value values
|
45
|
+
Solrizer::Extractor.format_node_value(values)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Deprecated.
|
49
|
+
# merges input_hash into solr_hash
|
50
|
+
# @param [Hash] input_hash the input hash of values
|
51
|
+
# @param [Hash] solr_hash the solr values hash to add the values into
|
52
|
+
# @return [Hash] the populated Solr values hash
|
53
|
+
#
|
54
|
+
def extract_hash( input_hash, solr_hash=Hash.new )
|
55
|
+
warn "[DEPRECATION] `extract_hash` is deprecated. Just pass values directly into your solr values hash"
|
56
|
+
return solr_hash.merge!(input_hash)
|
57
|
+
end
|
58
|
+
|
47
59
|
end
|
48
60
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'solr'
|
2
|
-
require 'rexml/document'
|
3
1
|
require "nokogiri"
|
4
2
|
require 'yaml'
|
5
3
|
|
@@ -8,7 +6,9 @@ module Solrizer::HTML::Extractor
|
|
8
6
|
#
|
9
7
|
# This method strips html tags out and returns content to be indexed in solr
|
10
8
|
#
|
11
|
-
|
9
|
+
# @param [Datastream] ds object that responds to .content with HTML content
|
10
|
+
# @param [Hash] solr_doc hash of values to be inserted into solr as a solr document
|
11
|
+
def html_to_solr( ds, solr_doc=Hash.new )
|
12
12
|
|
13
13
|
text = CGI.unescapeHTML(ds.content)
|
14
14
|
doc = Nokogiri::HTML(text)
|
@@ -17,7 +17,7 @@ module Solrizer::HTML::Extractor
|
|
17
17
|
stories = doc.xpath('//story')
|
18
18
|
|
19
19
|
stories.each do |story|
|
20
|
-
solr_doc
|
20
|
+
solr_doc.merge!({:story_display => story.children.to_xml})
|
21
21
|
end
|
22
22
|
|
23
23
|
#strip out text and put in story_t
|
@@ -28,7 +28,7 @@ module Solrizer::HTML::Extractor
|
|
28
28
|
text << text_node.content
|
29
29
|
end
|
30
30
|
|
31
|
-
solr_doc
|
31
|
+
solr_doc.merge!({:story_t => text})
|
32
32
|
|
33
33
|
return solr_doc
|
34
34
|
end
|
@@ -1,28 +1,29 @@
|
|
1
|
-
require
|
2
|
-
require 'rexml/document'
|
3
|
-
require "nokogiri"
|
4
|
-
require 'yaml'
|
1
|
+
require "xmlsimple"
|
5
2
|
|
6
3
|
module Solrizer::XML::Extractor
|
7
|
-
|
8
|
-
def extract_tags(text)
|
9
|
-
doc = REXML::Document.new( text )
|
10
|
-
extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
|
11
|
-
end
|
12
|
-
|
13
|
-
def extract_tag(doc, type)
|
14
|
-
tags = doc.elements["/fields/#{type}"]
|
15
|
-
return {} unless tags
|
16
|
-
{type => tags.text.split(/,/).map {|t| t.strip}}
|
17
|
-
end
|
18
4
|
|
19
5
|
#
|
20
6
|
# This method extracts solr fields from simple xml
|
7
|
+
# If you want to do anything more nuanced with the xml, use TerminologyBasedSolrizer instead.
|
21
8
|
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
9
|
+
# @param [xml] text xml content to index
|
10
|
+
# @param [Hash] solr_doc
|
11
|
+
def xml_to_solr( text, solr_doc=Hash.new )
|
12
|
+
doc = XmlSimple.xml_in( text )
|
13
|
+
|
14
|
+
doc.each_pair do |name, value|
|
15
|
+
if value.kind_of?(Array)
|
16
|
+
if value.first.kind_of?(Hash)
|
17
|
+
# This deals with the way xml-simple handles nodes with attributes
|
18
|
+
solr_doc.merge!({:"#{name}_t" => "#{value.first["content"]}"})
|
19
|
+
elsif value.length > 1
|
20
|
+
solr_doc.merge!({:"#{name}_t" => value})
|
21
|
+
else
|
22
|
+
solr_doc.merge!({:"#{name}_t" => "#{value}"})
|
23
|
+
end
|
24
|
+
else
|
25
|
+
solr_doc.merge!({:"#{name}_t" => "#{value}"})
|
26
|
+
end
|
26
27
|
end
|
27
28
|
|
28
29
|
return solr_doc
|
@@ -8,9 +8,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
8
8
|
# Module Methods
|
9
9
|
|
10
10
|
# Build a solr document from +doc+ based on its terminology
|
11
|
-
# @
|
12
|
-
# @
|
13
|
-
def self.solrize(doc, solr_doc=
|
11
|
+
# @param [OM::XML::Document] doc
|
12
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
13
|
+
def self.solrize(doc, solr_doc=Hash.new, field_mapper = nil)
|
14
14
|
unless doc.class.terminology.nil?
|
15
15
|
doc.class.terminology.terms.each_pair do |term_name,term|
|
16
16
|
doc.solrize_term(term, solr_doc, field_mapper)
|
@@ -23,9 +23,10 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
23
23
|
|
24
24
|
# Populate a solr document with fields based on nodes in +xml+ corresponding to the
|
25
25
|
# term identified by +term_pointer+ within +terminology+
|
26
|
-
# @
|
27
|
-
# @
|
28
|
-
|
26
|
+
# @param [OM::XML::Document] doc xml document to extract values from
|
27
|
+
# @param [OM::XML::Term] term corresponding to desired xml values
|
28
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
29
|
+
def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
29
30
|
terminology = doc.class.terminology
|
30
31
|
parents = opts.fetch(:parents, [])
|
31
32
|
|
@@ -52,10 +53,11 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
52
53
|
|
53
54
|
# Populate a solr document with solr fields corresponding to the given xml node
|
54
55
|
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
55
|
-
# @
|
56
|
-
# @
|
57
|
-
# @
|
58
|
-
|
56
|
+
# @param [Nokogiri::XML::Node] node to solrize
|
57
|
+
# @param [OM::XML::Document] doc document the node came from
|
58
|
+
# @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
|
59
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
60
|
+
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
61
|
field_mapper ||= self.default_field_mapper
|
60
62
|
terminology = doc.class.terminology
|
61
63
|
# term = terminology.retrieve_term(*term_pointer)
|
@@ -70,7 +72,7 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
70
72
|
|
71
73
|
field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
72
74
|
unless field_value.join("").strip.blank?
|
73
|
-
|
75
|
+
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
@@ -78,32 +80,26 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
78
80
|
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
79
81
|
field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
80
82
|
unless field_value.join("").strip.blank?
|
81
|
-
|
83
|
+
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
82
84
|
end
|
83
85
|
end
|
84
86
|
end
|
85
87
|
solr_doc
|
86
88
|
end
|
87
|
-
|
88
|
-
# Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
|
89
|
-
# @values Array of strings representing the values returned
|
90
|
-
def self.format_node_value values
|
91
|
-
values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
|
92
|
-
end
|
93
89
|
|
94
90
|
# Instance Methods
|
95
91
|
|
96
92
|
attr_accessor :field_mapper
|
97
93
|
|
98
|
-
def to_solr(solr_doc =
|
94
|
+
def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
|
99
95
|
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
|
100
96
|
end
|
101
97
|
|
102
|
-
def solrize_term(term, solr_doc =
|
98
|
+
def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
103
99
|
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)
|
104
100
|
end
|
105
101
|
|
106
|
-
def solrize_node(node, term_pointer, term, solr_doc =
|
102
|
+
def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
107
103
|
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc, field_mapper, opts)
|
108
104
|
end
|
109
105
|
|
data/solrizer.gemspec
CHANGED
@@ -7,52 +7,10 @@ describe Solrizer::Extractor do
|
|
7
7
|
@extractor = Solrizer::Extractor.new
|
8
8
|
end
|
9
9
|
|
10
|
-
describe "
|
11
|
-
it "should
|
12
|
-
|
13
|
-
|
14
|
-
example_result = @extractor.extract_hash( example_hash )
|
15
|
-
example_result.should be_kind_of Solr::Document
|
16
|
-
example_hash.each_pair do |key,values|
|
17
|
-
if values.class == String
|
18
|
-
example_result["#{key}_facet"].should == values
|
19
|
-
else
|
20
|
-
values.each do |v|
|
21
|
-
example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
|
22
|
-
example_result.inspect.include?("@value=\"#{v}\"").should be_true
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should handle hashes with facets listed in a sub-hash" do
|
29
|
-
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
30
|
-
result = @extractor.extract_hash( simple_hash )
|
31
|
-
result.should be_kind_of Solr::Document
|
32
|
-
result["technology_facet"].should == "t1"
|
33
|
-
result.inspect.include?('@boost=nil').should be_true
|
34
|
-
result.inspect.include?('@name="technology_facet"').should be_true
|
35
|
-
result.inspect.include?('@value="t2"').should be_true
|
36
|
-
result["company_facet"].should == "c1"
|
37
|
-
result["person_facet"].should == "p1"
|
38
|
-
result.inspect.include?('@name="person_facet"').should be_true
|
39
|
-
result.inspect.include?('@value="p2"').should be_true
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should create symbols from the :symbols subhash" do
|
44
|
-
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
45
|
-
result = @extractor.extract_hash( simple_hash )
|
46
|
-
result.should be_kind_of Solr::Document
|
47
|
-
result["technology_s"].should == "t1"
|
48
|
-
result.inspect.include?('@name="technology_s"').should be_true
|
49
|
-
result.inspect.include?('@value="t2"').should be_true
|
50
|
-
|
51
|
-
result["company_s"].should == "c1"
|
52
|
-
result["person_s"].should == "p1"
|
53
|
-
result.inspect.include?('@name="person_s"').should be_true
|
54
|
-
result.inspect.include?('@value="p2"').should be_true
|
55
|
-
|
10
|
+
describe ".format_node_value" do
|
11
|
+
it "should strip white space out of the array and join it with a single blank" do
|
12
|
+
Solrizer::Extractor.format_node_value([" test \n node \t value \t"]).should == "test node value"
|
13
|
+
Solrizer::Extractor.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
|
56
14
|
end
|
57
15
|
end
|
58
16
|
|
@@ -10,18 +10,19 @@ describe Solrizer::XML::Extractor do
|
|
10
10
|
describe ".xml_to_solr" do
|
11
11
|
it "should turn simple xml into a solr document" do
|
12
12
|
desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
|
13
|
+
|
13
14
|
result = @extractor.xml_to_solr(desc_meta)
|
14
15
|
result[:type_t].should == "text"
|
15
16
|
result[:medium_t].should == "Paper Document"
|
16
17
|
result[:rights_t].should == "Presumed under copyright. Do not publish."
|
17
18
|
result[:date_t].should == "1985-12-30"
|
18
|
-
result[:format_t].should
|
19
|
+
result[:format_t].should be_kind_of(Array)
|
20
|
+
result[:format_t].should include("application/tiff")
|
21
|
+
result[:format_t].should include("application/pdf")
|
22
|
+
result[:format_t].should include("application/jp2000")
|
19
23
|
result[:title_t].should == "This is a Sample Title"
|
20
24
|
result[:publisher_t].should == "Sample Unversity"
|
21
25
|
|
22
|
-
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
23
|
-
result.inspect.include?('@value="application/tiff"').should be_true
|
24
|
-
result.inspect.include?('@value="application/pdf"').should be_true
|
25
26
|
end
|
26
27
|
end
|
27
28
|
|
@@ -22,18 +22,18 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
22
22
|
|
23
23
|
it "should provide .to_solr and return a SolrDocument" do
|
24
24
|
@mods_article.should respond_to(:to_solr)
|
25
|
-
@mods_article.to_solr.should be_kind_of(
|
25
|
+
@mods_article.to_solr.should be_kind_of(Hash)
|
26
26
|
end
|
27
27
|
|
28
|
-
it "should optionally allow you to provide the
|
29
|
-
doc =
|
28
|
+
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
29
|
+
doc = Hash.new
|
30
30
|
@mods_article.to_solr(doc).should equal(doc)
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
34
34
|
# mock_terms = {:name1=>:term1, :name2=>:term2}
|
35
35
|
# ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
|
36
|
-
solr_doc =
|
36
|
+
solr_doc = Hash.new
|
37
37
|
@mods_article.field_mapper = Solrizer::FieldMapper::Default.new
|
38
38
|
OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
39
39
|
@mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
|
@@ -46,10 +46,13 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
46
46
|
solr_doc = @mods_article.to_solr
|
47
47
|
#should have these
|
48
48
|
|
49
|
-
solr_doc[
|
50
|
-
solr_doc[
|
51
|
-
solr_doc[
|
52
|
-
solr_doc[
|
49
|
+
solr_doc["abstract"].should be_nil
|
50
|
+
solr_doc["abstract_t"].should == ["ABSTRACT"]
|
51
|
+
solr_doc["title_info_1_language_t"].should == ["finnish"]
|
52
|
+
solr_doc["person_1_role_0_text_t"].should == ["teacher"]
|
53
|
+
solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
|
54
|
+
# This next line will fail until om > 1.0.2 is released
|
55
|
+
# solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
|
53
56
|
|
54
57
|
# These are a holdover from an old verison of OM
|
55
58
|
# solr_doc[:finnish_title_info_language_t].should == "finnish"
|
@@ -69,19 +72,19 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
69
72
|
describe ".solrize_term" do
|
70
73
|
|
71
74
|
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
72
|
-
solr_doc =
|
75
|
+
solr_doc = Hash.new
|
73
76
|
result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
74
77
|
result.should == solr_doc
|
75
78
|
# @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
|
76
79
|
end
|
77
80
|
|
78
81
|
it "should add multiple fields based on index_as" do
|
79
|
-
fake_solr_doc =
|
82
|
+
fake_solr_doc = {}
|
80
83
|
@mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:name), fake_solr_doc)
|
81
84
|
|
82
85
|
expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
83
86
|
%w(_t _display _facet).each do |suffix|
|
84
|
-
actual_names = fake_solr_doc
|
87
|
+
actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
|
85
88
|
{suffix => actual_names}.should == {suffix => expected_names}
|
86
89
|
end
|
87
90
|
end
|
@@ -89,8 +92,8 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
89
92
|
end
|
90
93
|
|
91
94
|
describe ".solrize_node" do
|
92
|
-
it "should optionally allow you to provide the
|
93
|
-
doc =
|
95
|
+
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
96
|
+
doc = Hash.new
|
94
97
|
# @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
|
95
98
|
end
|
96
99
|
|
@@ -99,10 +102,4 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
99
102
|
it "should only create one node if parents is empty"
|
100
103
|
end
|
101
104
|
|
102
|
-
describe ".format_node_value" do
|
103
|
-
it "should strip white space out of the array and join it with a single blank" do
|
104
|
-
Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test \n node \t value \t"]).should == "test node value"
|
105
|
-
Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
|
106
|
-
end
|
107
|
-
end
|
108
105
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|