solrizer 1.0.0 → 1.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/Gemfile +1 -1
- data/Gemfile.lock +15 -18
- data/History.txt +15 -0
- data/Rakefile +0 -1
- data/VERSION +1 -1
- data/lib/solrizer/extractor.rb +42 -30
- data/lib/solrizer/html/extractor.rb +5 -5
- data/lib/solrizer/xml/extractor.rb +20 -19
- data/lib/solrizer/xml/terminology_based_solrizer.rb +17 -21
- data/solrizer.gemspec +1 -1
- data/spec/units/extractor_spec.rb +4 -46
- data/spec/units/xml_extractor_spec.rb +5 -4
- data/spec/units/xml_terminology_based_solrizer_spec.rb +16 -19
- metadata +3 -3
data/Gemfile
CHANGED
data/Gemfile.lock
CHANGED
@@ -1,33 +1,30 @@
|
|
1
1
|
GEM
|
2
2
|
remote: http://rubygems.org/
|
3
3
|
specs:
|
4
|
-
columnize (0.3.
|
5
|
-
facets (2.9.
|
6
|
-
gemcutter (0.6.1)
|
4
|
+
columnize (0.3.2)
|
5
|
+
facets (2.9.1)
|
7
6
|
git (1.2.5)
|
8
|
-
jeweler (1.
|
9
|
-
|
7
|
+
jeweler (1.5.2)
|
8
|
+
bundler (~> 1.0.0)
|
10
9
|
git (>= 1.2.5)
|
11
|
-
|
12
|
-
json_pure (1.4.6)
|
10
|
+
rake
|
13
11
|
linecache (0.43)
|
14
12
|
mediashelf-loggable (0.4.0)
|
15
|
-
mocha (0.9.
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
mocha (0.9.12)
|
14
|
+
nokogiri (1.4.4)
|
15
|
+
om (1.0.2)
|
16
|
+
facets
|
19
17
|
facets
|
18
|
+
nokogiri
|
20
19
|
nokogiri (>= 1.4.2)
|
21
20
|
rake (0.8.7)
|
22
21
|
rspec (1.3.1)
|
23
|
-
ruby-debug (0.10.
|
22
|
+
ruby-debug (0.10.4)
|
24
23
|
columnize (>= 0.1)
|
25
|
-
ruby-debug-base (~> 0.10.
|
26
|
-
ruby-debug-base (0.10.
|
24
|
+
ruby-debug-base (~> 0.10.4.0)
|
25
|
+
ruby-debug-base (0.10.4)
|
27
26
|
linecache (>= 0.3)
|
28
|
-
|
29
|
-
json_pure (>= 1.1.7)
|
30
|
-
solr-ruby (0.0.8)
|
27
|
+
xml-simple (1.0.14)
|
31
28
|
|
32
29
|
PLATFORMS
|
33
30
|
ruby
|
@@ -41,4 +38,4 @@ DEPENDENCIES
|
|
41
38
|
rspec (< 2.0.0)
|
42
39
|
ruby-debug
|
43
40
|
ruby-debug-base
|
44
|
-
|
41
|
+
xml-simple
|
data/History.txt
CHANGED
@@ -1,3 +1,18 @@
|
|
1
|
+
h2. 1.0.0
|
2
|
+
|
3
|
+
Deprecated
|
4
|
+
|
5
|
+
* extract_tag
|
6
|
+
* extract_tags
|
7
|
+
* extract_hash
|
8
|
+
|
9
|
+
|
10
|
+
All solrize and extract methods now accept and return a _Hash_ rather than a Solr::Document
|
11
|
+
Removed dependency on outdated "solr" gem, replaced it with RSolr
|
12
|
+
As part of this switch, *all field names are stored as Strings, not Symbols*. If you previously accessed something as solr_doc[:title_t], now you need to access it as solr_doc["title_t"]
|
13
|
+
|
14
|
+
Moved #format_node_value and #insert_solr_field_value from TerminologyBasedSolrizer to Solrizer::Extractor
|
15
|
+
|
1
16
|
h2. 0.3.1
|
2
17
|
|
3
18
|
fix in require statements
|
data/Rakefile
CHANGED
@@ -10,7 +10,6 @@ begin
|
|
10
10
|
gem.email = "matt.zumwalt@yourmediashelf.com"
|
11
11
|
gem.homepage = "http://github.com/projecthydra/solrizer"
|
12
12
|
gem.authors = ["Matt Zumwalt"]
|
13
|
-
gem.add_dependency "solr-ruby"
|
14
13
|
gem.add_dependency "nokogiri"
|
15
14
|
gem.add_dependency "om"
|
16
15
|
gem.add_dependency "nokogiri"
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
1.0.1
|
data/lib/solrizer/extractor.rb
CHANGED
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'solr'
|
2
|
-
require 'rexml/document'
|
3
1
|
require "nokogiri"
|
4
2
|
require 'yaml'
|
5
3
|
|
@@ -12,37 +10,51 @@ module Solrizer
|
|
12
10
|
# with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
|
13
11
|
#
|
14
12
|
class Extractor
|
15
|
-
|
16
|
-
#
|
17
|
-
#
|
18
|
-
#
|
19
|
-
#
|
20
|
-
#
|
21
|
-
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
|
29
|
-
when "Array"
|
30
|
-
value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
if input_hash.has_key?(:symbols)
|
35
|
-
input_hash[:symbols].each do |symbol_name, value|
|
36
|
-
case value.class.to_s
|
37
|
-
when "String"
|
38
|
-
solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
|
39
|
-
when "Array"
|
40
|
-
value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
|
41
|
-
end
|
42
|
-
end
|
13
|
+
|
14
|
+
# Insert +field_value+ for +field_name+ into +solr_doc+
|
15
|
+
# Ensures that field values are always appended to arrays within the values hash.
|
16
|
+
# Also ensures that values are run through format_node_value
|
17
|
+
# @param [Hash] solr_doc
|
18
|
+
# @param [String] field_name
|
19
|
+
# @param [String] field_value
|
20
|
+
def self.insert_solr_field_value(solr_doc, field_name, field_value)
|
21
|
+
formatted_value = self.format_node_value(field_value)
|
22
|
+
if solr_doc.has_key?(field_name)
|
23
|
+
solr_doc[field_name] << formatted_value
|
24
|
+
else
|
25
|
+
solr_doc.merge!( {field_name => [formatted_value]} )
|
43
26
|
end
|
44
27
|
return solr_doc
|
45
28
|
end
|
46
29
|
|
30
|
+
# Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
|
31
|
+
# @param [Array] values Array of strings representing the values returned
|
32
|
+
def self.format_node_value values
|
33
|
+
values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
|
34
|
+
end
|
35
|
+
|
36
|
+
# Instance Methods
|
37
|
+
|
38
|
+
# Alias for Solrizer::Extractor#insert_solr_field_value
|
39
|
+
def insert_solr_field_value(solr_doc, field_name, field_value)
|
40
|
+
Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
41
|
+
end
|
42
|
+
|
43
|
+
# Alias for Solrizer::Extractor#format_node_value
|
44
|
+
def format_node_value values
|
45
|
+
Solrizer::Extractor.format_node_value(values)
|
46
|
+
end
|
47
|
+
|
48
|
+
# Deprecated.
|
49
|
+
# merges input_hash into solr_hash
|
50
|
+
# @param [Hash] input_hash the input hash of values
|
51
|
+
# @param [Hash] solr_hash the solr values hash to add the values into
|
52
|
+
# @return [Hash] the populated Solr values hash
|
53
|
+
#
|
54
|
+
def extract_hash( input_hash, solr_hash=Hash.new )
|
55
|
+
warn "[DEPRECATION] `extract_hash` is deprecated. Just pass values directly into your solr values hash"
|
56
|
+
return solr_hash.merge!(input_hash)
|
57
|
+
end
|
58
|
+
|
47
59
|
end
|
48
60
|
end
|
@@ -1,5 +1,3 @@
|
|
1
|
-
require 'solr'
|
2
|
-
require 'rexml/document'
|
3
1
|
require "nokogiri"
|
4
2
|
require 'yaml'
|
5
3
|
|
@@ -8,7 +6,9 @@ module Solrizer::HTML::Extractor
|
|
8
6
|
#
|
9
7
|
# This method strips html tags out and returns content to be indexed in solr
|
10
8
|
#
|
11
|
-
|
9
|
+
# @param [Datastream] ds object that responds to .content with HTML content
|
10
|
+
# @param [Hash] solr_doc hash of values to be inserted into solr as a solr document
|
11
|
+
def html_to_solr( ds, solr_doc=Hash.new )
|
12
12
|
|
13
13
|
text = CGI.unescapeHTML(ds.content)
|
14
14
|
doc = Nokogiri::HTML(text)
|
@@ -17,7 +17,7 @@ module Solrizer::HTML::Extractor
|
|
17
17
|
stories = doc.xpath('//story')
|
18
18
|
|
19
19
|
stories.each do |story|
|
20
|
-
solr_doc
|
20
|
+
solr_doc.merge!({:story_display => story.children.to_xml})
|
21
21
|
end
|
22
22
|
|
23
23
|
#strip out text and put in story_t
|
@@ -28,7 +28,7 @@ module Solrizer::HTML::Extractor
|
|
28
28
|
text << text_node.content
|
29
29
|
end
|
30
30
|
|
31
|
-
solr_doc
|
31
|
+
solr_doc.merge!({:story_t => text})
|
32
32
|
|
33
33
|
return solr_doc
|
34
34
|
end
|
@@ -1,28 +1,29 @@
|
|
1
|
-
require
|
2
|
-
require 'rexml/document'
|
3
|
-
require "nokogiri"
|
4
|
-
require 'yaml'
|
1
|
+
require "xmlsimple"
|
5
2
|
|
6
3
|
module Solrizer::XML::Extractor
|
7
|
-
|
8
|
-
def extract_tags(text)
|
9
|
-
doc = REXML::Document.new( text )
|
10
|
-
extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
|
11
|
-
end
|
12
|
-
|
13
|
-
def extract_tag(doc, type)
|
14
|
-
tags = doc.elements["/fields/#{type}"]
|
15
|
-
return {} unless tags
|
16
|
-
{type => tags.text.split(/,/).map {|t| t.strip}}
|
17
|
-
end
|
18
4
|
|
19
5
|
#
|
20
6
|
# This method extracts solr fields from simple xml
|
7
|
+
# If you want to do anything more nuanced with the xml, use TerminologyBasedSolrizer instead.
|
21
8
|
#
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
9
|
+
# @param [xml] text xml content to index
|
10
|
+
# @param [Hash] solr_doc
|
11
|
+
def xml_to_solr( text, solr_doc=Hash.new )
|
12
|
+
doc = XmlSimple.xml_in( text )
|
13
|
+
|
14
|
+
doc.each_pair do |name, value|
|
15
|
+
if value.kind_of?(Array)
|
16
|
+
if value.first.kind_of?(Hash)
|
17
|
+
# This deals with the way xml-simple handles nodes with attributes
|
18
|
+
solr_doc.merge!({:"#{name}_t" => "#{value.first["content"]}"})
|
19
|
+
elsif value.length > 1
|
20
|
+
solr_doc.merge!({:"#{name}_t" => value})
|
21
|
+
else
|
22
|
+
solr_doc.merge!({:"#{name}_t" => "#{value}"})
|
23
|
+
end
|
24
|
+
else
|
25
|
+
solr_doc.merge!({:"#{name}_t" => "#{value}"})
|
26
|
+
end
|
26
27
|
end
|
27
28
|
|
28
29
|
return solr_doc
|
@@ -8,9 +8,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
8
8
|
# Module Methods
|
9
9
|
|
10
10
|
# Build a solr document from +doc+ based on its terminology
|
11
|
-
# @
|
12
|
-
# @
|
13
|
-
def self.solrize(doc, solr_doc=
|
11
|
+
# @param [OM::XML::Document] doc
|
12
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
13
|
+
def self.solrize(doc, solr_doc=Hash.new, field_mapper = nil)
|
14
14
|
unless doc.class.terminology.nil?
|
15
15
|
doc.class.terminology.terms.each_pair do |term_name,term|
|
16
16
|
doc.solrize_term(term, solr_doc, field_mapper)
|
@@ -23,9 +23,10 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
23
23
|
|
24
24
|
# Populate a solr document with fields based on nodes in +xml+ corresponding to the
|
25
25
|
# term identified by +term_pointer+ within +terminology+
|
26
|
-
# @
|
27
|
-
# @
|
28
|
-
|
26
|
+
# @param [OM::XML::Document] doc xml document to extract values from
|
27
|
+
# @param [OM::XML::Term] term corresponding to desired xml values
|
28
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
29
|
+
def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
29
30
|
terminology = doc.class.terminology
|
30
31
|
parents = opts.fetch(:parents, [])
|
31
32
|
|
@@ -52,10 +53,11 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
52
53
|
|
53
54
|
# Populate a solr document with solr fields corresponding to the given xml node
|
54
55
|
# Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
|
55
|
-
# @
|
56
|
-
# @
|
57
|
-
# @
|
58
|
-
|
56
|
+
# @param [Nokogiri::XML::Node] node to solrize
|
57
|
+
# @param [OM::XML::Document] doc document the node came from
|
58
|
+
# @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
|
59
|
+
# @param [Hash] (optional) solr_doc (values hash) to populate
|
60
|
+
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
61
|
field_mapper ||= self.default_field_mapper
|
60
62
|
terminology = doc.class.terminology
|
61
63
|
# term = terminology.retrieve_term(*term_pointer)
|
@@ -70,7 +72,7 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
70
72
|
|
71
73
|
field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
72
74
|
unless field_value.join("").strip.blank?
|
73
|
-
|
75
|
+
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
74
76
|
end
|
75
77
|
end
|
76
78
|
|
@@ -78,32 +80,26 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
78
80
|
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
79
81
|
field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
80
82
|
unless field_value.join("").strip.blank?
|
81
|
-
|
83
|
+
::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
|
82
84
|
end
|
83
85
|
end
|
84
86
|
end
|
85
87
|
solr_doc
|
86
88
|
end
|
87
|
-
|
88
|
-
# Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
|
89
|
-
# @values Array of strings representing the values returned
|
90
|
-
def self.format_node_value values
|
91
|
-
values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
|
92
|
-
end
|
93
89
|
|
94
90
|
# Instance Methods
|
95
91
|
|
96
92
|
attr_accessor :field_mapper
|
97
93
|
|
98
|
-
def to_solr(solr_doc =
|
94
|
+
def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
|
99
95
|
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
|
100
96
|
end
|
101
97
|
|
102
|
-
def solrize_term(term, solr_doc =
|
98
|
+
def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
103
99
|
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)
|
104
100
|
end
|
105
101
|
|
106
|
-
def solrize_node(node, term_pointer, term, solr_doc =
|
102
|
+
def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
|
107
103
|
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc, field_mapper, opts)
|
108
104
|
end
|
109
105
|
|
data/solrizer.gemspec
CHANGED
@@ -7,52 +7,10 @@ describe Solrizer::Extractor do
|
|
7
7
|
@extractor = Solrizer::Extractor.new
|
8
8
|
end
|
9
9
|
|
10
|
-
describe "
|
11
|
-
it "should
|
12
|
-
|
13
|
-
|
14
|
-
example_result = @extractor.extract_hash( example_hash )
|
15
|
-
example_result.should be_kind_of Solr::Document
|
16
|
-
example_hash.each_pair do |key,values|
|
17
|
-
if values.class == String
|
18
|
-
example_result["#{key}_facet"].should == values
|
19
|
-
else
|
20
|
-
values.each do |v|
|
21
|
-
example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
|
22
|
-
example_result.inspect.include?("@value=\"#{v}\"").should be_true
|
23
|
-
end
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
|
28
|
-
it "should handle hashes with facets listed in a sub-hash" do
|
29
|
-
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
30
|
-
result = @extractor.extract_hash( simple_hash )
|
31
|
-
result.should be_kind_of Solr::Document
|
32
|
-
result["technology_facet"].should == "t1"
|
33
|
-
result.inspect.include?('@boost=nil').should be_true
|
34
|
-
result.inspect.include?('@name="technology_facet"').should be_true
|
35
|
-
result.inspect.include?('@value="t2"').should be_true
|
36
|
-
result["company_facet"].should == "c1"
|
37
|
-
result["person_facet"].should == "p1"
|
38
|
-
result.inspect.include?('@name="person_facet"').should be_true
|
39
|
-
result.inspect.include?('@value="p2"').should be_true
|
40
|
-
|
41
|
-
end
|
42
|
-
|
43
|
-
it "should create symbols from the :symbols subhash" do
|
44
|
-
simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
|
45
|
-
result = @extractor.extract_hash( simple_hash )
|
46
|
-
result.should be_kind_of Solr::Document
|
47
|
-
result["technology_s"].should == "t1"
|
48
|
-
result.inspect.include?('@name="technology_s"').should be_true
|
49
|
-
result.inspect.include?('@value="t2"').should be_true
|
50
|
-
|
51
|
-
result["company_s"].should == "c1"
|
52
|
-
result["person_s"].should == "p1"
|
53
|
-
result.inspect.include?('@name="person_s"').should be_true
|
54
|
-
result.inspect.include?('@value="p2"').should be_true
|
55
|
-
|
10
|
+
describe ".format_node_value" do
|
11
|
+
it "should strip white space out of the array and join it with a single blank" do
|
12
|
+
Solrizer::Extractor.format_node_value([" test \n node \t value \t"]).should == "test node value"
|
13
|
+
Solrizer::Extractor.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
|
56
14
|
end
|
57
15
|
end
|
58
16
|
|
@@ -10,18 +10,19 @@ describe Solrizer::XML::Extractor do
|
|
10
10
|
describe ".xml_to_solr" do
|
11
11
|
it "should turn simple xml into a solr document" do
|
12
12
|
desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
|
13
|
+
|
13
14
|
result = @extractor.xml_to_solr(desc_meta)
|
14
15
|
result[:type_t].should == "text"
|
15
16
|
result[:medium_t].should == "Paper Document"
|
16
17
|
result[:rights_t].should == "Presumed under copyright. Do not publish."
|
17
18
|
result[:date_t].should == "1985-12-30"
|
18
|
-
result[:format_t].should
|
19
|
+
result[:format_t].should be_kind_of(Array)
|
20
|
+
result[:format_t].should include("application/tiff")
|
21
|
+
result[:format_t].should include("application/pdf")
|
22
|
+
result[:format_t].should include("application/jp2000")
|
19
23
|
result[:title_t].should == "This is a Sample Title"
|
20
24
|
result[:publisher_t].should == "Sample Unversity"
|
21
25
|
|
22
|
-
# ... and a hacky way of making sure that it added a field for each of the dc:medium values
|
23
|
-
result.inspect.include?('@value="application/tiff"').should be_true
|
24
|
-
result.inspect.include?('@value="application/pdf"').should be_true
|
25
26
|
end
|
26
27
|
end
|
27
28
|
|
@@ -22,18 +22,18 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
22
22
|
|
23
23
|
it "should provide .to_solr and return a SolrDocument" do
|
24
24
|
@mods_article.should respond_to(:to_solr)
|
25
|
-
@mods_article.to_solr.should be_kind_of(
|
25
|
+
@mods_article.to_solr.should be_kind_of(Hash)
|
26
26
|
end
|
27
27
|
|
28
|
-
it "should optionally allow you to provide the
|
29
|
-
doc =
|
28
|
+
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
29
|
+
doc = Hash.new
|
30
30
|
@mods_article.to_solr(doc).should equal(doc)
|
31
31
|
end
|
32
32
|
|
33
33
|
it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
|
34
34
|
# mock_terms = {:name1=>:term1, :name2=>:term2}
|
35
35
|
# ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
|
36
|
-
solr_doc =
|
36
|
+
solr_doc = Hash.new
|
37
37
|
@mods_article.field_mapper = Solrizer::FieldMapper::Default.new
|
38
38
|
OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
|
39
39
|
@mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
|
@@ -46,10 +46,13 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
46
46
|
solr_doc = @mods_article.to_solr
|
47
47
|
#should have these
|
48
48
|
|
49
|
-
solr_doc[
|
50
|
-
solr_doc[
|
51
|
-
solr_doc[
|
52
|
-
solr_doc[
|
49
|
+
solr_doc["abstract"].should be_nil
|
50
|
+
solr_doc["abstract_t"].should == ["ABSTRACT"]
|
51
|
+
solr_doc["title_info_1_language_t"].should == ["finnish"]
|
52
|
+
solr_doc["person_1_role_0_text_t"].should == ["teacher"]
|
53
|
+
solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
|
54
|
+
# This next line will fail until om > 1.0.2 is released
|
55
|
+
# solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
|
53
56
|
|
54
57
|
# These are a holdover from an old verison of OM
|
55
58
|
# solr_doc[:finnish_title_info_language_t].should == "finnish"
|
@@ -69,19 +72,19 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
69
72
|
describe ".solrize_term" do
|
70
73
|
|
71
74
|
it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
|
72
|
-
solr_doc =
|
75
|
+
solr_doc = Hash.new
|
73
76
|
result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
|
74
77
|
result.should == solr_doc
|
75
78
|
# @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
|
76
79
|
end
|
77
80
|
|
78
81
|
it "should add multiple fields based on index_as" do
|
79
|
-
fake_solr_doc =
|
82
|
+
fake_solr_doc = {}
|
80
83
|
@mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:name), fake_solr_doc)
|
81
84
|
|
82
85
|
expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
|
83
86
|
%w(_t _display _facet).each do |suffix|
|
84
|
-
actual_names = fake_solr_doc
|
87
|
+
actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
|
85
88
|
{suffix => actual_names}.should == {suffix => expected_names}
|
86
89
|
end
|
87
90
|
end
|
@@ -89,8 +92,8 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
89
92
|
end
|
90
93
|
|
91
94
|
describe ".solrize_node" do
|
92
|
-
it "should optionally allow you to provide the
|
93
|
-
doc =
|
95
|
+
it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
|
96
|
+
doc = Hash.new
|
94
97
|
# @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
|
95
98
|
end
|
96
99
|
|
@@ -99,10 +102,4 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
99
102
|
it "should only create one node if parents is empty"
|
100
103
|
end
|
101
104
|
|
102
|
-
describe ".format_node_value" do
|
103
|
-
it "should strip white space out of the array and join it with a single blank" do
|
104
|
-
Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test \n node \t value \t"]).should == "test node value"
|
105
|
-
Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
|
106
|
-
end
|
107
|
-
end
|
108
105
|
end
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 21
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 1
|
8
8
|
- 0
|
9
|
-
-
|
10
|
-
version: 1.0.
|
9
|
+
- 1
|
10
|
+
version: 1.0.1
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- Matt Zumwalt
|