solrizer 1.0.0 → 1.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Gemfile CHANGED
@@ -1,7 +1,7 @@
1
1
  source "http://rubygems.org"
2
2
 
3
- gem "solr-ruby"
4
3
  gem "nokogiri"
4
+ gem "xml-simple"
5
5
  gem "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
6
6
  gem "mediashelf-loggable"
7
7
 
data/Gemfile.lock CHANGED
@@ -1,33 +1,30 @@
1
1
  GEM
2
2
  remote: http://rubygems.org/
3
3
  specs:
4
- columnize (0.3.1)
5
- facets (2.9.0)
6
- gemcutter (0.6.1)
4
+ columnize (0.3.2)
5
+ facets (2.9.1)
7
6
  git (1.2.5)
8
- jeweler (1.4.0)
9
- gemcutter (>= 0.1.0)
7
+ jeweler (1.5.2)
8
+ bundler (~> 1.0.0)
10
9
  git (>= 1.2.5)
11
- rubyforge (>= 2.0.0)
12
- json_pure (1.4.6)
10
+ rake
13
11
  linecache (0.43)
14
12
  mediashelf-loggable (0.4.0)
15
- mocha (0.9.9)
16
- rake
17
- nokogiri (1.4.3.1)
18
- om (1.0.0)
13
+ mocha (0.9.12)
14
+ nokogiri (1.4.4)
15
+ om (1.0.2)
16
+ facets
19
17
  facets
18
+ nokogiri
20
19
  nokogiri (>= 1.4.2)
21
20
  rake (0.8.7)
22
21
  rspec (1.3.1)
23
- ruby-debug (0.10.3)
22
+ ruby-debug (0.10.4)
24
23
  columnize (>= 0.1)
25
- ruby-debug-base (~> 0.10.3.0)
26
- ruby-debug-base (0.10.3)
24
+ ruby-debug-base (~> 0.10.4.0)
25
+ ruby-debug-base (0.10.4)
27
26
  linecache (>= 0.3)
28
- rubyforge (2.0.4)
29
- json_pure (>= 1.1.7)
30
- solr-ruby (0.0.8)
27
+ xml-simple (1.0.14)
31
28
 
32
29
  PLATFORMS
33
30
  ruby
@@ -41,4 +38,4 @@ DEPENDENCIES
41
38
  rspec (< 2.0.0)
42
39
  ruby-debug
43
40
  ruby-debug-base
44
- solr-ruby
41
+ xml-simple
data/History.txt CHANGED
@@ -1,3 +1,18 @@
1
+ h2. 1.0.0
2
+
3
+ Deprecated
4
+
5
+ * extract_tag
6
+ * extract_tags
7
+ * extract_hash
8
+
9
+
10
+ All solrize and extract methods now accept and return a _Hash_ rather than a Solr::Document
11
+ Removed dependency on outdated "solr" gem, replaced it with RSolr
12
+ As part of this switch, *all field names are stored as Strings, not Symbols*. If you previously accessed something as solr_doc[:title_t], now you need to access it as solr_doc["title_t"]
13
+
14
+ Moved #format_node_value and #insert_solr_field_value from TerminologyBasedSolrizer to Solrizer::Extractor
15
+
1
16
  h2. 0.3.1
2
17
 
3
18
  fix in require statements
data/Rakefile CHANGED
@@ -10,7 +10,6 @@ begin
10
10
  gem.email = "matt.zumwalt@yourmediashelf.com"
11
11
  gem.homepage = "http://github.com/projecthydra/solrizer"
12
12
  gem.authors = ["Matt Zumwalt"]
13
- gem.add_dependency "solr-ruby"
14
13
  gem.add_dependency "nokogiri"
15
14
  gem.add_dependency "om"
16
15
  gem.add_dependency "nokogiri"
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.3.2
1
+ 1.0.1
@@ -1,5 +1,3 @@
1
- require 'solr'
2
- require 'rexml/document'
3
1
  require "nokogiri"
4
2
  require 'yaml'
5
3
 
@@ -12,37 +10,51 @@ module Solrizer
12
10
  # with methods specific to that implementation (ie. extract_tag, extract_rels_ext, xml_to_solr, html_to_solr)
13
11
  #
14
12
  class Extractor
15
-
16
- # Populates a solr doc with values from a hash.
17
- # Accepts two forms of hashes:
18
- # => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}
19
- # or
20
- # => {:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]} }
21
- #
22
- # Note that values for individual fields can be a single string or an array of strings.
23
- def extract_hash( input_hash, solr_doc=Solr::Document.new )
24
- facets = input_hash.has_key?(:facets) ? input_hash[:facets] : input_hash
25
- facets.each_pair do |facet_name, value|
26
- case value.class.to_s
27
- when "String"
28
- solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{value}" )
29
- when "Array"
30
- value.each { |v| solr_doc << Solr::Field.new( :"#{facet_name}_facet" => "#{v}" ) }
31
- end
32
- end
33
-
34
- if input_hash.has_key?(:symbols)
35
- input_hash[:symbols].each do |symbol_name, value|
36
- case value.class.to_s
37
- when "String"
38
- solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{value}" )
39
- when "Array"
40
- value.each { |v| solr_doc << Solr::Field.new( :"#{symbol_name}_s" => "#{v}" ) }
41
- end
42
- end
13
+
14
+ # Insert +field_value+ for +field_name+ into +solr_doc+
15
+ # Ensures that field values are always appended to arrays within the values hash.
16
+ # Also ensures that values are run through format_node_value
17
+ # @param [Hash] solr_doc
18
+ # @param [String] field_name
19
+ # @param [String] field_value
20
+ def self.insert_solr_field_value(solr_doc, field_name, field_value)
21
+ formatted_value = self.format_node_value(field_value)
22
+ if solr_doc.has_key?(field_name)
23
+ solr_doc[field_name] << formatted_value
24
+ else
25
+ solr_doc.merge!( {field_name => [formatted_value]} )
43
26
  end
44
27
  return solr_doc
45
28
  end
46
29
 
30
+ # Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
31
+ # @param [Array] values Array of strings representing the values returned
32
+ def self.format_node_value values
33
+ values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
34
+ end
35
+
36
+ # Instance Methods
37
+
38
+ # Alias for Solrizer::Extractor#insert_solr_field_value
39
+ def insert_solr_field_value(solr_doc, field_name, field_value)
40
+ Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
41
+ end
42
+
43
+ # Alias for Solrizer::Extractor#format_node_value
44
+ def format_node_value values
45
+ Solrizer::Extractor.format_node_value(values)
46
+ end
47
+
48
+ # Deprecated.
49
+ # merges input_hash into solr_hash
50
+ # @param [Hash] input_hash the input hash of values
51
+ # @param [Hash] solr_hash the solr values hash to add the values into
52
+ # @return [Hash] the populated Solr values hash
53
+ #
54
+ def extract_hash( input_hash, solr_hash=Hash.new )
55
+ warn "[DEPRECATION] `extract_hash` is deprecated. Just pass values directly into your solr values hash"
56
+ return solr_hash.merge!(input_hash)
57
+ end
58
+
47
59
  end
48
60
  end
@@ -1,5 +1,3 @@
1
- require 'solr'
2
- require 'rexml/document'
3
1
  require "nokogiri"
4
2
  require 'yaml'
5
3
 
@@ -8,7 +6,9 @@ module Solrizer::HTML::Extractor
8
6
  #
9
7
  # This method strips html tags out and returns content to be indexed in solr
10
8
  #
11
- def html_to_solr( ds, solr_doc=Solr::Document.new )
9
+ # @param [Datastream] ds object that responds to .content with HTML content
10
+ # @param [Hash] solr_doc hash of values to be inserted into solr as a solr document
11
+ def html_to_solr( ds, solr_doc=Hash.new )
12
12
 
13
13
  text = CGI.unescapeHTML(ds.content)
14
14
  doc = Nokogiri::HTML(text)
@@ -17,7 +17,7 @@ module Solrizer::HTML::Extractor
17
17
  stories = doc.xpath('//story')
18
18
 
19
19
  stories.each do |story|
20
- solr_doc << Solr::Field.new(:story_display => story.children.to_xml)
20
+ solr_doc.merge!({:story_display => story.children.to_xml})
21
21
  end
22
22
 
23
23
  #strip out text and put in story_t
@@ -28,7 +28,7 @@ module Solrizer::HTML::Extractor
28
28
  text << text_node.content
29
29
  end
30
30
 
31
- solr_doc << Solr::Field.new(:story_t => text)
31
+ solr_doc.merge!({:story_t => text})
32
32
 
33
33
  return solr_doc
34
34
  end
@@ -1,28 +1,29 @@
1
- require 'solr'
2
- require 'rexml/document'
3
- require "nokogiri"
4
- require 'yaml'
1
+ require "xmlsimple"
5
2
 
6
3
  module Solrizer::XML::Extractor
7
-
8
- def extract_tags(text)
9
- doc = REXML::Document.new( text )
10
- extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
11
- end
12
-
13
- def extract_tag(doc, type)
14
- tags = doc.elements["/fields/#{type}"]
15
- return {} unless tags
16
- {type => tags.text.split(/,/).map {|t| t.strip}}
17
- end
18
4
 
19
5
  #
20
6
  # This method extracts solr fields from simple xml
7
+ # If you want to do anything more nuanced with the xml, use TerminologyBasedSolrizer instead.
21
8
  #
22
- def xml_to_solr( text, solr_doc=Solr::Document.new )
23
- doc = REXML::Document.new( text )
24
- doc.root.elements.each do |element|
25
- solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" )
9
+ # @param [xml] text xml content to index
10
+ # @param [Hash] solr_doc
11
+ def xml_to_solr( text, solr_doc=Hash.new )
12
+ doc = XmlSimple.xml_in( text )
13
+
14
+ doc.each_pair do |name, value|
15
+ if value.kind_of?(Array)
16
+ if value.first.kind_of?(Hash)
17
+ # This deals with the way xml-simple handles nodes with attributes
18
+ solr_doc.merge!({:"#{name}_t" => "#{value.first["content"]}"})
19
+ elsif value.length > 1
20
+ solr_doc.merge!({:"#{name}_t" => value})
21
+ else
22
+ solr_doc.merge!({:"#{name}_t" => "#{value}"})
23
+ end
24
+ else
25
+ solr_doc.merge!({:"#{name}_t" => "#{value}"})
26
+ end
26
27
  end
27
28
 
28
29
  return solr_doc
@@ -8,9 +8,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
8
8
  # Module Methods
9
9
 
10
10
  # Build a solr document from +doc+ based on its terminology
11
- # @doc OM::XML::Document
12
- # @solr_doc (optional) Solr::Document to populate
13
- def self.solrize(doc, solr_doc=Solr::Document.new, field_mapper = nil)
11
+ # @param [OM::XML::Document] doc
12
+ # @param [Hash] (optional) solr_doc (values hash) to populate
13
+ def self.solrize(doc, solr_doc=Hash.new, field_mapper = nil)
14
14
  unless doc.class.terminology.nil?
15
15
  doc.class.terminology.terms.each_pair do |term_name,term|
16
16
  doc.solrize_term(term, solr_doc, field_mapper)
@@ -23,9 +23,10 @@ module Solrizer::XML::TerminologyBasedSolrizer
23
23
 
24
24
  # Populate a solr document with fields based on nodes in +xml+ corresponding to the
25
25
  # term identified by +term_pointer+ within +terminology+
26
- # @doc OM::XML::Document or Nokogiri::XML::Node
27
- # @term_pointer Array pointing to the desired term in +terminology+
28
- def self.solrize_term(doc, term, solr_doc = Solr::Document.new, field_mapper = nil, opts={})
26
+ # @param [OM::XML::Document] doc xml document to extract values from
27
+ # @param [OM::XML::Term] term corresponding to desired xml values
28
+ # @param [Hash] (optional) solr_doc (values hash) to populate
29
+ def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
29
30
  terminology = doc.class.terminology
30
31
  parents = opts.fetch(:parents, [])
31
32
 
@@ -52,10 +53,11 @@ module Solrizer::XML::TerminologyBasedSolrizer
52
53
 
53
54
  # Populate a solr document with solr fields corresponding to the given xml node
54
55
  # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
55
- # @doc OM::XML::Document or Nokogiri::XML::Node
56
- # @term_pointer Array pointing to the desired term in +terminology+
57
- # @solr_doc (optional) Solr::Document to populate
58
- def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new, field_mapper = nil, opts = {})
56
+ # @param [Nokogiri::XML::Node] node to solrize
57
+ # @param [OM::XML::Document] doc document the node came from
58
+ # @param [Array] term_pointer Array pointing to the term that should be used for solrization settings
59
+ # @param [Hash] (optional) solr_doc (values hash) to populate
60
+ def self.solrize_node(node, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
59
61
  field_mapper ||= self.default_field_mapper
60
62
  terminology = doc.class.terminology
61
63
  # term = terminology.retrieve_term(*term_pointer)
@@ -70,7 +72,7 @@ module Solrizer::XML::TerminologyBasedSolrizer
70
72
 
71
73
  field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
72
74
  unless field_value.join("").strip.blank?
73
- solr_doc << Solr::Field.new(field_name => self.format_node_value(field_value))
75
+ ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
74
76
  end
75
77
  end
76
78
 
@@ -78,32 +80,26 @@ module Solrizer::XML::TerminologyBasedSolrizer
78
80
  hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
79
81
  field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
80
82
  unless field_value.join("").strip.blank?
81
- solr_doc << Solr::Field.new(field_name => self.format_node_value(field_value))
83
+ ::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value)
82
84
  end
83
85
  end
84
86
  end
85
87
  solr_doc
86
88
  end
87
-
88
- # Strips the majority of whitespace from the values array and then joins them with a single blank delimitter
89
- # @values Array of strings representing the values returned
90
- def self.format_node_value values
91
- values.map{|val| val.gsub(/\s+/,' ').strip}.join(" ")
92
- end
93
89
 
94
90
  # Instance Methods
95
91
 
96
92
  attr_accessor :field_mapper
97
93
 
98
- def to_solr(solr_doc = Solr::Document.new, field_mapper = self.field_mapper) # :nodoc:
94
+ def to_solr(solr_doc = Hash.new, field_mapper = self.field_mapper) # :nodoc:
99
95
  Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
100
96
  end
101
97
 
102
- def solrize_term(term, solr_doc = Solr::Document.new, field_mapper = self.field_mapper, opts={})
98
+ def solrize_term(term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
103
99
  Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)
104
100
  end
105
101
 
106
- def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new, field_mapper = self.field_mapper, opts={})
102
+ def solrize_node(node, term_pointer, term, solr_doc = Hash.new, field_mapper = self.field_mapper, opts={})
107
103
  Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc, field_mapper, opts)
108
104
  end
109
105
 
data/solrizer.gemspec CHANGED
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{solrizer}
8
- s.version = "1.0.0"
8
+ s.version = "1.0.1"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matt Zumwalt"]
@@ -7,52 +7,10 @@ describe Solrizer::Extractor do
7
7
  @extractor = Solrizer::Extractor.new
8
8
  end
9
9
 
10
- describe "extract_hash" do
11
- it "should convert a hash to a solr doc" do
12
- example_hash = {"box"=>"Box 51A", "city"=>["Ann Arbor", "Hyderabad", "Palo Alto"], "person"=>["ELLIE ENGELMORE", "Reddy", "EDWARD FEIGENBAUM"], "title"=>"Letter from Ellie Engelmore to Professor K. C. Reddy", "series"=>"eaf7000", "folder"=>"Folder 15", "technology"=>["artificial intelligence"], "year"=>"1985", "organization"=>["Heuristic Programming Project", "Mathematics and Computer/Information Sciences University of Hyderabad Central University P. O. Hyder", "Professor K. C. Reddy School of Mathematics and Computer/Information Sciences"], "collection"=>"e-a-feigenbaum-collection", "state"=>["Michigan", "California"]}
13
-
14
- example_result = @extractor.extract_hash( example_hash )
15
- example_result.should be_kind_of Solr::Document
16
- example_hash.each_pair do |key,values|
17
- if values.class == String
18
- example_result["#{key}_facet"].should == values
19
- else
20
- values.each do |v|
21
- example_result.inspect.include?("@name=\"#{key}_facet\"").should be_true
22
- example_result.inspect.include?("@value=\"#{v}\"").should be_true
23
- end
24
- end
25
- end
26
- end
27
-
28
- it "should handle hashes with facets listed in a sub-hash" do
29
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
30
- result = @extractor.extract_hash( simple_hash )
31
- result.should be_kind_of Solr::Document
32
- result["technology_facet"].should == "t1"
33
- result.inspect.include?('@boost=nil').should be_true
34
- result.inspect.include?('@name="technology_facet"').should be_true
35
- result.inspect.include?('@value="t2"').should be_true
36
- result["company_facet"].should == "c1"
37
- result["person_facet"].should == "p1"
38
- result.inspect.include?('@name="person_facet"').should be_true
39
- result.inspect.include?('@value="p2"').should be_true
40
-
41
- end
42
-
43
- it "should create symbols from the :symbols subhash" do
44
- simple_hash = Hash[:facets => {'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}, :symbols=>{'technology'=>["t1", "t2"], 'company'=>"c1", "person"=>["p1", "p2"]}]
45
- result = @extractor.extract_hash( simple_hash )
46
- result.should be_kind_of Solr::Document
47
- result["technology_s"].should == "t1"
48
- result.inspect.include?('@name="technology_s"').should be_true
49
- result.inspect.include?('@value="t2"').should be_true
50
-
51
- result["company_s"].should == "c1"
52
- result["person_s"].should == "p1"
53
- result.inspect.include?('@name="person_s"').should be_true
54
- result.inspect.include?('@value="p2"').should be_true
55
-
10
+ describe ".format_node_value" do
11
+ it "should strip white space out of the array and join it with a single blank" do
12
+ Solrizer::Extractor.format_node_value([" test \n node \t value \t"]).should == "test node value"
13
+ Solrizer::Extractor.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
56
14
  end
57
15
  end
58
16
 
@@ -10,18 +10,19 @@ describe Solrizer::XML::Extractor do
10
10
  describe ".xml_to_solr" do
11
11
  it "should turn simple xml into a solr document" do
12
12
  desc_meta = fixture("druid-bv448hq0314-descMetadata.xml")
13
+
13
14
  result = @extractor.xml_to_solr(desc_meta)
14
15
  result[:type_t].should == "text"
15
16
  result[:medium_t].should == "Paper Document"
16
17
  result[:rights_t].should == "Presumed under copyright. Do not publish."
17
18
  result[:date_t].should == "1985-12-30"
18
- result[:format_t].should == "application/tiff"
19
+ result[:format_t].should be_kind_of(Array)
20
+ result[:format_t].should include("application/tiff")
21
+ result[:format_t].should include("application/pdf")
22
+ result[:format_t].should include("application/jp2000")
19
23
  result[:title_t].should == "This is a Sample Title"
20
24
  result[:publisher_t].should == "Sample Unversity"
21
25
 
22
- # ... and a hacky way of making sure that it added a field for each of the dc:medium values
23
- result.inspect.include?('@value="application/tiff"').should be_true
24
- result.inspect.include?('@value="application/pdf"').should be_true
25
26
  end
26
27
  end
27
28
 
@@ -22,18 +22,18 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
22
22
 
23
23
  it "should provide .to_solr and return a SolrDocument" do
24
24
  @mods_article.should respond_to(:to_solr)
25
- @mods_article.to_solr.should be_kind_of(Solr::Document)
25
+ @mods_article.to_solr.should be_kind_of(Hash)
26
26
  end
27
27
 
28
- it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
29
- doc = Solr::Document.new
28
+ it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
29
+ doc = Hash.new
30
30
  @mods_article.to_solr(doc).should equal(doc)
31
31
  end
32
32
 
33
33
  it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
34
34
  # mock_terms = {:name1=>:term1, :name2=>:term2}
35
35
  # ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
36
- solr_doc = Solr::Document.new
36
+ solr_doc = Hash.new
37
37
  @mods_article.field_mapper = Solrizer::FieldMapper::Default.new
38
38
  OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
39
39
  @mods_article.expects(:solrize_term).with(v, solr_doc, @mods_article.field_mapper)
@@ -46,10 +46,13 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
46
46
  solr_doc = @mods_article.to_solr
47
47
  #should have these
48
48
 
49
- solr_doc[:abstract].should be_nil
50
- solr_doc[:abstract_t].should == "ABSTRACT"
51
- solr_doc[:title_info_1_language_t].should == "finnish"
52
- solr_doc[:person_1_role_0_text_t].should == "teacher"
49
+ solr_doc["abstract"].should be_nil
50
+ solr_doc["abstract_t"].should == ["ABSTRACT"]
51
+ solr_doc["title_info_1_language_t"].should == ["finnish"]
52
+ solr_doc["person_1_role_0_text_t"].should == ["teacher"]
53
+ solr_doc["person_last_name_t"].sort.should == ["FAMILY NAME", "Gautama"]
54
+ # This next line will fail until om > 1.0.2 is released
55
+ # solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC1", "TOPIC2"]
53
56
 
54
57
  # These are a holdover from an old verison of OM
55
58
  # solr_doc[:finnish_title_info_language_t].should == "finnish"
@@ -69,19 +72,19 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
69
72
  describe ".solrize_term" do
70
73
 
71
74
  it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
72
- solr_doc = Solr::Document.new
75
+ solr_doc = Hash.new
73
76
  result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
74
77
  result.should == solr_doc
75
78
  # @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
76
79
  end
77
80
 
78
81
  it "should add multiple fields based on index_as" do
79
- fake_solr_doc = [] # duck typing cheat: test will break if solrize_term decides to use methods other than <<
82
+ fake_solr_doc = {}
80
83
  @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:name), fake_solr_doc)
81
84
 
82
85
  expected_names = ["DR.", "FAMILY NAME", "GIVEN NAMES"]
83
86
  %w(_t _display _facet).each do |suffix|
84
- actual_names = fake_solr_doc.select { |field| field.name == 'name_0_namePart' + suffix }.map { |field| field.value }.sort
87
+ actual_names = fake_solr_doc["name_0_namePart#{suffix}"].sort
85
88
  {suffix => actual_names}.should == {suffix => expected_names}
86
89
  end
87
90
  end
@@ -89,8 +92,8 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
89
92
  end
90
93
 
91
94
  describe ".solrize_node" do
92
- it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
93
- doc = Solr::Document.new
95
+ it "should optionally allow you to provide the Hash to add fields to and return that document when done" do
96
+ doc = Hash.new
94
97
  # @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
95
98
  end
96
99
 
@@ -99,10 +102,4 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
99
102
  it "should only create one node if parents is empty"
100
103
  end
101
104
 
102
- describe ".format_node_value" do
103
- it "should strip white space out of the array and join it with a single blank" do
104
- Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test \n node \t value \t"]).should == "test node value"
105
- Solrizer::XML::TerminologyBasedSolrizer.format_node_value([" test ", " \n node ", " \t value \t"]).should == "test node value"
106
- end
107
- end
108
105
  end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 23
4
+ hash: 21
5
5
  prerelease: false
6
6
  segments:
7
7
  - 1
8
8
  - 0
9
- - 0
10
- version: 1.0.0
9
+ - 1
10
+ version: 1.0.1
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Zumwalt