solrizer 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/Rakefile CHANGED
@@ -11,6 +11,7 @@ begin
11
11
  gem.homepage = "http://github.com/projecthydra/solrizer"
12
12
  gem.authors = ["Matt Zumwalt"]
13
13
  gem.add_dependency "active-fedora", ">= 1.1.5"
14
+ gem.add_dependency "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
14
15
  gem.add_development_dependency "rspec", ">= 1.2.9"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
17
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
@@ -0,0 +1,14 @@
1
+ id: id
2
+ date: _dt
3
+ string: _t
4
+ text: _t
5
+ symbol: _s
6
+ integer: _i
7
+ long: _l
8
+ boolean: _b
9
+ float: _f
10
+ double: _d
11
+ facet: _facet
12
+ display: _display
13
+ sort: _sort
14
+ unstemmed_search: _unstem_search
@@ -0,0 +1,62 @@
1
+ require "yaml"
2
+
3
+ module Solrizer
4
+ module FieldNameMapper
5
+
6
+ # Module Methods & Attributes
7
+ @@mappings = {}
8
+
9
+ # Generates solr field names from settings in solr_mappings
10
+ def self.solr_name(field_name, field_type)
11
+ name = field_name.to_s + self.mappings[field_type.to_s].to_s
12
+ if field_name.kind_of?(Symbol)
13
+ return name.to_sym
14
+ else
15
+ return name.to_s
16
+ end
17
+ end
18
+
19
+ def self.mappings
20
+ @@mappings
21
+ end
22
+
23
+ def self.mappings=(mappings)
24
+ @@mappings = mappings
25
+ end
26
+
27
+ # Instance Methods
28
+
29
+ def solr_name(field_name, field_type)
30
+ ::Solrizer::FieldNameMapper.solr_name(field_name, field_type)
31
+ end
32
+
33
+ def self.logger
34
+ @logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
35
+ end
36
+
37
+ # Loads solr mappings from yml file.
38
+ # @config_path This is the path to the directory where your mappings file is stored. @default "RAILS_ROOT/config/solr_mappings.yml"
39
+ # @mappings_file This is the filename for your solr mappings YAML file. @default solr_mappings.yml
40
+ def self.load_mappings( config_path=nil )
41
+
42
+ if config_path.nil?
43
+ if defined?(RAILS_ROOT)
44
+ config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
45
+ end
46
+ # Default to using the config file within the gem
47
+ if !File.exist?(config_path.to_s)
48
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
49
+ end
50
+ end
51
+
52
+ logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
53
+
54
+ @@mappings = YAML::load(File.open(config_path))
55
+
56
+ mappings["id"] = "id" unless mappings["id"]
57
+ end
58
+
59
+ # This ensures that some mappings will always be loaded
60
+ self.load_mappings
61
+ end #FieldNameMapper
62
+ end #Solrizer
@@ -0,0 +1,104 @@
1
+ # This module is only suitable to mix into Classes that use the OM::XML::Document Module
2
+ module Solrizer::XML::TerminologyBasedSolrizer
3
+
4
+ # Module Methods
5
+
6
+ # Build a solr document from +doc+ based on its terminology
7
+ # @doc OM::XML::Document
8
+ # @solr_doc (optional) Solr::Document to populate
9
+ def self.solrize(doc, solr_doc=Solr::Document.new)
10
+ unless doc.class.terminology.nil?
11
+ doc.class.terminology.terms.each_pair do |term_name,term|
12
+ doc.solrize_term(term, solr_doc)
13
+ # self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
14
+ end
15
+ end
16
+
17
+ return solr_doc
18
+ end
19
+
20
+ # Populate a solr document with fields based on nodes in +xml+ corresponding to the
21
+ # term identified by +term_pointer+ within +terminology+
22
+ # @doc OM::XML::Document or Nokogiri::XML::Node
23
+ # @term_pointer Array pointing to the desired term in +terminology+
24
+ def self.solrize_term(doc, term, solr_doc = Solr::Document.new, opts={})
25
+ terminology = doc.class.terminology
26
+ parents = opts.fetch(:parents, [])
27
+
28
+ term_pointer = parents+[term.name]
29
+
30
+ # term = terminology.retrieve_term(term_pointer)
31
+
32
+ # prep children hash
33
+ # child_accessors = accessor_info.fetch(:children, {})
34
+ # xpath = term.xpath_for(*term_pointer)
35
+ nodeset = doc.find_by_terms(*term_pointer)
36
+
37
+ nodeset.each do |node|
38
+ # create solr fields
39
+
40
+ self.solrize_node(node, doc, term_pointer, term, solr_doc)
41
+ term.children.each_pair do |child_term_name, child_term|
42
+ doc.solrize_term(child_term, solr_doc, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
43
+ # self.solrize_term(doc, child_term_name, child_term, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
44
+ end
45
+ end
46
+ solr_doc
47
+ end
48
+
49
+ # Populate a solr document with solr fields corresponding to the given xml node
50
+ # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
+ # @doc OM::XML::Document or Nokogiri::XML::Node
52
+ # @term_pointer Array pointing to the desired term in +terminology+
53
+ # @solr_doc (optional) Solr::Document to populate
54
+ def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new)
55
+ terminology = doc.class.terminology
56
+ # term = terminology.retrieve_term(*term_pointer)
57
+
58
+ if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
59
+ node_value = node.value
60
+ else
61
+ node_value = node.text
62
+ end
63
+
64
+ generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
65
+ generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
66
+
67
+ solr_doc << Solr::Field.new(generic_field_name => node_value)
68
+
69
+ if term_pointer.length > 1
70
+ hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
71
+ hierarchical_field_name = self.generate_solr_symbol(hierarchical_field_name_base, term.data_type)
72
+ solr_doc << Solr::Field.new(hierarchical_field_name => node_value)
73
+ end
74
+ solr_doc
75
+ end
76
+
77
+ # Use Solrizer::FieldNameMapper to generate an appropriate solr field name +field_name+ and +field_type+
78
+ def self.generate_solr_symbol(field_name, field_type) # :nodoc:
79
+ Solrizer::FieldNameMapper.solr_name(field_name, field_type)
80
+ end
81
+
82
+ # Instance Methods
83
+
84
+
85
+ def to_solr(solr_doc = Solr::Document.new) # :nodoc:
86
+ Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc)
87
+ end
88
+
89
+
90
+ def solrize_term(term, solr_doc = Solr::Document.new, opts={})
91
+ Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, opts)
92
+ end
93
+
94
+ def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new)
95
+ Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc)
96
+ end
97
+
98
+ protected
99
+
100
+ def generate_solr_symbol(field_name, field_type) # :nodoc:
101
+ Solrizer::XML::TerminologyBasedSolrizer.generate_solr_symbol(field_name, field_type)
102
+ end
103
+
104
+ end
@@ -0,0 +1,4 @@
1
+ module Solrizer::XML
2
+ end
3
+
4
+ require "solrizer/xml/terminology_based_solrizer"
data/lib/solrizer.rb CHANGED
@@ -1,10 +1,14 @@
1
1
  require 'rubygems'
2
2
  require 'solrizer/indexer.rb'
3
+ require 'solrizer/field_name_mapper.rb'
4
+
5
+ # Let people explicitly require xml support if they want it ...
6
+ # require 'solrizer/xml.rb'
7
+
3
8
  # require 'fastercsv'
4
9
  require "ruby-debug"
5
10
 
6
11
 
7
-
8
12
  module Solrizer
9
13
  class Solrizer
10
14
 
@@ -34,7 +38,19 @@ class Solrizer
34
38
 
35
39
  start = Time.now
36
40
  print "Retrieving object #{obj} ..."
37
- obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
41
+
42
+ case obj
43
+ when ActiveFedora::Base
44
+ # do nothing
45
+ when Fedora::FedoraObject
46
+ obj = Repository.get_object( obj.pid )
47
+ when String
48
+ obj = Repository.get_object( obj )
49
+ else
50
+ raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
51
+ end
52
+
53
+ # obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
38
54
 
39
55
  obj_done = Time.now
40
56
  obj_done_elapse = obj_done - start
data/solrizer.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{solrizer}
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matt Zumwalt"]
12
- s.date = %q{2010-09-10}
12
+ s.date = %q{2010-09-15}
13
13
  s.description = %q{Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener}
14
14
  s.email = %q{matt.zumwalt@yourmediashelf.com}
15
15
  s.extra_rdoc_files = [
@@ -26,13 +26,17 @@ Gem::Specification.new do |s|
26
26
  "config/fedora.yml",
27
27
  "config/hydra_types.yml",
28
28
  "config/solr.yml",
29
+ "config/solr_mappings.yml",
29
30
  "lib/solrizer.rb",
30
31
  "lib/solrizer/configuration.rb",
31
32
  "lib/solrizer/extractor.rb",
33
+ "lib/solrizer/field_name_mapper.rb",
32
34
  "lib/solrizer/indexer.rb",
33
35
  "lib/solrizer/main.rb",
34
36
  "lib/solrizer/replicator.rb",
35
37
  "lib/solrizer/repository.rb",
38
+ "lib/solrizer/xml.rb",
39
+ "lib/solrizer/xml/terminology_based_solrizer.rb",
36
40
  "lib/tasks/solrizer.rake",
37
41
  "solrizer.gemspec",
38
42
  "spec/fixtures/druid-bv448hq0314-descMetadata.xml",
@@ -40,14 +44,18 @@ Gem::Specification.new do |s|
40
44
  "spec/fixtures/druid-cm234kq4672-extProperties.xml",
41
45
  "spec/fixtures/druid-cm234kq4672-stories.xml",
42
46
  "spec/fixtures/druid-hc513kw4806-descMetadata.xml",
47
+ "spec/fixtures/mods_articles/hydrangea_article1.xml",
43
48
  "spec/fixtures/rels_ext_cmodel.xml",
49
+ "spec/fixtures/solr_mappings_af_0.1.yml",
44
50
  "spec/integration/indexer_spec.rb",
45
51
  "spec/rcov.opts",
46
52
  "spec/spec.opts",
47
53
  "spec/spec_helper.rb",
48
54
  "spec/units/extractor_spec.rb",
55
+ "spec/units/field_name_mapper_spec.rb",
49
56
  "spec/units/indexer_spec.rb",
50
- "spec/units/shelver_spec.rb"
57
+ "spec/units/shelver_spec.rb",
58
+ "spec/units/xml_terminology_based_solrizer_spec.rb"
51
59
  ]
52
60
  s.homepage = %q{http://github.com/projecthydra/solrizer}
53
61
  s.rdoc_options = ["--charset=UTF-8"]
@@ -58,8 +66,10 @@ Gem::Specification.new do |s|
58
66
  "spec/integration/indexer_spec.rb",
59
67
  "spec/spec_helper.rb",
60
68
  "spec/units/extractor_spec.rb",
69
+ "spec/units/field_name_mapper_spec.rb",
61
70
  "spec/units/indexer_spec.rb",
62
- "spec/units/shelver_spec.rb"
71
+ "spec/units/shelver_spec.rb",
72
+ "spec/units/xml_terminology_based_solrizer_spec.rb"
63
73
  ]
64
74
 
65
75
  if s.respond_to? :specification_version then
@@ -68,13 +78,16 @@ Gem::Specification.new do |s|
68
78
 
69
79
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
70
80
  s.add_runtime_dependency(%q<active-fedora>, [">= 1.1.5"])
81
+ s.add_runtime_dependency(%q<om>, [">= 1.0.0"])
71
82
  s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
72
83
  else
73
84
  s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
85
+ s.add_dependency(%q<om>, [">= 1.0.0"])
74
86
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
75
87
  end
76
88
  else
77
89
  s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
90
+ s.add_dependency(%q<om>, [">= 1.0.0"])
78
91
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
79
92
  end
80
93
  end
@@ -0,0 +1,90 @@
1
+ <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
2
+ http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
+
4
+ <titleInfo>
5
+ <nonSort>THE</nonSort>
6
+ <title>ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
7
+ <subTitle>SUBTITLE</subTitle>
8
+ </titleInfo>
9
+ <titleInfo lang="finnish">
10
+ <title>Artikkelin otsikko Hydrangea artiklan 1</title>
11
+ </titleInfo>
12
+
13
+ <name type="personal">
14
+ <namePart type="family">FAMILY NAME</namePart>
15
+ <namePart type="given">GIVEN NAMES</namePart>
16
+ <namePart type="termsOfAddress">DR.</namePart>
17
+ <displayForm>NAME AS IT APPEARS</displayForm>
18
+ <affiliation>FACULTY, UNIVERSITY</affiliation>
19
+ <role>
20
+ <roleTerm authority="marcrelator" type="text">creator</roleTerm>
21
+ </role>
22
+ <role>
23
+ <roleTerm type="text">submitter</roleTerm>
24
+ </role>
25
+ </name>
26
+
27
+ <name type="personal">
28
+ <namePart type="family">Gautama</namePart>
29
+ <namePart type="given">Siddartha</namePart>
30
+ <namePart type="termsOfAddress">Prince</namePart>
31
+ <affiliation>Nirvana</affiliation>
32
+ <role>
33
+ <roleTerm authority="marcrelator" type="text">teacher</roleTerm>
34
+ </role>
35
+ </name>
36
+
37
+ <typeOfResource>text</typeOfResource>
38
+ <genre authority="local">journal article</genre>
39
+
40
+ <abstract>ABSTRACT</abstract>
41
+ <subject>
42
+ <topic>TOPIC 1</topic>
43
+ <topic>TOPIC 2</topic>
44
+ </subject>
45
+ <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
46
+ <topic>CONTROLLED TERM</topic>
47
+ </subject>
48
+
49
+ <language>
50
+ <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
51
+ </language>
52
+
53
+ <physicalDescription>
54
+ <internetMediaType>application/pdf</internetMediaType>
55
+ <extent>36 p.</extent>
56
+ </physicalDescription>
57
+
58
+ <relatedItem type="host">
59
+ <titleInfo>
60
+ <title>TITLE OF HOST JOURNAL</title>
61
+ </titleInfo>
62
+ <originInfo>
63
+ <publisher>PUBLISHER</publisher>
64
+ <dateIssued>DATE</dateIssued>
65
+ </originInfo>
66
+ <identifier type="issn">0013-8908</identifier>
67
+ <part>
68
+ <detail type="volume">
69
+ <number>2</number>
70
+ </detail>
71
+ <detail type="level">
72
+ <number>2</number>
73
+ </detail>
74
+ <extent unit="pages">
75
+ <start>195</start>
76
+ <end>230</end>
77
+ </extent>
78
+ <date>FEB. 2007</date>
79
+ </part>
80
+ </relatedItem>
81
+
82
+ <identifier type="uri">http://URL.edu.au/</identifier>
83
+ <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
84
+ <location>
85
+ <url>http://URL.edu.au/</url>
86
+ </location>
87
+ <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
88
+ <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
89
+
90
+ </mods>
@@ -0,0 +1,16 @@
1
+ id: id
2
+ date: _date
3
+ string: _field
4
+ text: _field
5
+ symbol: _field
6
+ integer: _field
7
+ long: _field
8
+ boolean: _field
9
+ float: _field
10
+ double: _field
11
+ facet: _facet
12
+ display: _display
13
+ sort: _sort
14
+ unstemmed_search: _unstem_search
15
+
16
+
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,8 @@ require 'solrizer'
4
4
  require 'spec'
5
5
  require 'spec/autorun'
6
6
 
7
+ require 'solrizer'
8
+
7
9
  Spec::Runner.configure do |config|
8
10
 
9
11
  config.mock_with :mocha
@@ -0,0 +1,41 @@
1
+ require File.join( File.dirname(__FILE__), "..", "spec_helper" )
2
+
3
+ # require 'solrizer'
4
+ # require 'solrizer/field_name_mapper'
5
+
6
+ class FieldNameMapperTest
7
+ include Solrizer::FieldNameMapper
8
+ end
9
+
10
+ def helper
11
+ @test_instance
12
+ end
13
+
14
+ describe Solrizer::FieldNameMapper do
15
+
16
+ before(:each) do
17
+ @test_instance = FieldNameMapperTest.new
18
+ end
19
+
20
+ after(:all) do
21
+ # Revert to default mappings after running tests
22
+ Solrizer::FieldNameMapper.load_mappings
23
+ end
24
+
25
+ describe ".solr_name" do
26
+ it "should generate solr field names from settings in solr_mappings" do
27
+ helper.solr_name(:system_create, :date).should == :system_create_dt
28
+ end
29
+ it "should format the response based on the class of the input" do
30
+ helper.solr_name(:system_create, :date).should == :system_create_dt
31
+ helper.solr_name("system_create", :date).should == "system_create_dt"
32
+ end
33
+ it "should rely on whichever mappings have been loaded into the SolrService" do
34
+ helper.solr_name(:system_create, :date).should == :system_create_dt
35
+ helper.solr_name(:foo, :text).should == :foo_t
36
+ Solrizer::FieldNameMapper.load_mappings(File.join(File.dirname(__FILE__), "..", "fixtures", "solr_mappings_af_0.1.yml"))
37
+ helper.solr_name(:system_create, :date).should == :system_create_date
38
+ helper.solr_name(:foo, :text).should == :foo_field
39
+ end
40
+ end
41
+ end
@@ -1,5 +1,4 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
2
 
4
3
  describe Solrizer::Solrizer do
5
4
 
@@ -9,17 +8,17 @@ describe Solrizer::Solrizer do
9
8
 
10
9
  describe "solrize" do
11
10
  it "should trigger the indexer for the provided object" do
12
- # sample_obj = ActiveFedora::Base.new
13
- mock_object = mock("my object")
14
- mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
- mock_object.stubs(:pid)
16
- mock_object.stubs(:label)
17
- mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
- ActiveFedora::Base.expects(:load_instance).never
11
+ sample_obj = ActiveFedora::Base.new
12
+ @solrizer.indexer.expects(:index).with( sample_obj )
13
+ @solrizer.solrize( sample_obj )
14
+ end
15
+ it "should work with Fedora::FedoraObject objects" do
16
+ mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
17
+ ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
19
18
  @solrizer.indexer.expects(:index).with( mock_object )
20
19
  @solrizer.solrize( mock_object )
21
20
  end
22
- it "should still load the object if only a pid is provided" do
21
+ it "should load the object if only a pid is provided" do
23
22
  mock_object = mock("my object")
24
23
  mock_object.stubs(:pid)
25
24
  mock_object.stubs(:label)
@@ -29,13 +28,14 @@ describe Solrizer::Solrizer do
29
28
  @solrizer.indexer.expects(:index).with(mock_object)
30
29
  @solrizer.solrize("_PID_")
31
30
  end
31
+
32
32
  end
33
33
 
34
34
  describe "solrize_objects" do
35
- it "should call solrize for each pid returned by solr" do
36
- pids = [["pid1"], ["pid2"], ["pid3"]]
37
- Solrizer::Repository.expects(:get_pids).returns(pids)
38
- pids.each {|pid| @solrizer.expects(:solrize).with( pid ) }
35
+ it "should call solrize for each object returned by Fedora::Repository.find_objects" do
36
+ objects = [["pid1"], ["pid2"], ["pid3"]]
37
+ Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
38
+ objects.each {|object| @solrizer.expects(:solrize).with( object ) }
39
39
  @solrizer.solrize_objects
40
40
  end
41
41
  end
@@ -0,0 +1,88 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+ require 'solrizer/xml'
4
+
5
+ describe Solrizer::XML::TerminologyBasedSolrizer do
6
+
7
+ before(:all) do
8
+ OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
9
+ end
10
+
11
+ before(:each) do
12
+ article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
13
+ @mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
14
+ end
15
+
16
+ describe ".to_solr" do
17
+
18
+ # after(:all) do
19
+ # # Revert to default mappings after running tests
20
+ # ActiveFedora::SolrService.load_mappings
21
+ # end
22
+
23
+ it "should provide .to_solr and return a SolrDocument" do
24
+ @mods_article.should respond_to(:to_solr)
25
+ @mods_article.to_solr.should be_kind_of(Solr::Document)
26
+ end
27
+
28
+ it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
29
+ doc = Solr::Document.new
30
+ @mods_article.to_solr(doc).should equal(doc)
31
+ end
32
+
33
+ it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
34
+ # mock_terms = {:name1=>:term1, :name2=>:term2}
35
+ # ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
36
+ solr_doc = Solr::Document.new
37
+ OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
38
+ @mods_article.expects(:solrize_term).with(v, solr_doc)
39
+ end
40
+ @mods_article.to_solr(solr_doc)
41
+ end
42
+
43
+ it "should use Solr mappings to generate field names" do
44
+
45
+ solr_doc = @mods_article.to_solr
46
+ #should have these
47
+
48
+ solr_doc[:abstract].should be_nil
49
+ solr_doc[:abstract_t].should == "ABSTRACT"
50
+ solr_doc[:title_info_1_language_t].should == "finnish"
51
+ solr_doc[:person_1_role_0_text_t].should == "teacher"
52
+
53
+ # These are a holdover from an old verison of OM
54
+ # solr_doc[:finnish_title_info_language_t].should == "finnish"
55
+ # solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
56
+
57
+ # solr_doc[:mydate_date].should == "fake-date"
58
+ #
59
+ # solr_doc[:publisher_t].should be_nil
60
+ # solr_doc[:coverage_t].should be_nil
61
+ # solr_doc[:creation_date_dt].should be_nil
62
+ # solr_doc.should == ""
63
+
64
+ end
65
+
66
+ end
67
+
68
+ describe ".solrize_term" do
69
+
70
+ it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
71
+ solr_doc = Solr::Document.new
72
+ result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
73
+ result.should == solr_doc
74
+ # @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
75
+ end
76
+
77
+ end
78
+
79
+ describe ".solrize_node" do
80
+ it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
81
+ doc = Solr::Document.new
82
+ # @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
83
+ end
84
+ it "should create a solr field containing node.text"
85
+ it "should create hierarchical field entries if parents is not empty"
86
+ it "should only create one node if parents is empty"
87
+ end
88
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 3
10
- version: 0.1.3
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Zumwalt
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-10 00:00:00 -05:00
18
+ date: 2010-09-15 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -35,9 +35,25 @@ dependencies:
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
38
+ name: om
39
39
  prerelease: false
40
40
  requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 23
46
+ segments:
47
+ - 1
48
+ - 0
49
+ - 0
50
+ version: 1.0.0
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: rspec
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
41
57
  none: false
42
58
  requirements:
43
59
  - - ">="
@@ -49,7 +65,7 @@ dependencies:
49
65
  - 9
50
66
  version: 1.2.9
51
67
  type: :development
52
- version_requirements: *id002
68
+ version_requirements: *id003
53
69
  description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
54
70
  email: matt.zumwalt@yourmediashelf.com
55
71
  executables: []
@@ -69,13 +85,17 @@ files:
69
85
  - config/fedora.yml
70
86
  - config/hydra_types.yml
71
87
  - config/solr.yml
88
+ - config/solr_mappings.yml
72
89
  - lib/solrizer.rb
73
90
  - lib/solrizer/configuration.rb
74
91
  - lib/solrizer/extractor.rb
92
+ - lib/solrizer/field_name_mapper.rb
75
93
  - lib/solrizer/indexer.rb
76
94
  - lib/solrizer/main.rb
77
95
  - lib/solrizer/replicator.rb
78
96
  - lib/solrizer/repository.rb
97
+ - lib/solrizer/xml.rb
98
+ - lib/solrizer/xml/terminology_based_solrizer.rb
79
99
  - lib/tasks/solrizer.rake
80
100
  - solrizer.gemspec
81
101
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
@@ -83,14 +103,18 @@ files:
83
103
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
84
104
  - spec/fixtures/druid-cm234kq4672-stories.xml
85
105
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
106
+ - spec/fixtures/mods_articles/hydrangea_article1.xml
86
107
  - spec/fixtures/rels_ext_cmodel.xml
108
+ - spec/fixtures/solr_mappings_af_0.1.yml
87
109
  - spec/integration/indexer_spec.rb
88
110
  - spec/rcov.opts
89
111
  - spec/spec.opts
90
112
  - spec/spec_helper.rb
91
113
  - spec/units/extractor_spec.rb
114
+ - spec/units/field_name_mapper_spec.rb
92
115
  - spec/units/indexer_spec.rb
93
116
  - spec/units/shelver_spec.rb
117
+ - spec/units/xml_terminology_based_solrizer_spec.rb
94
118
  has_rdoc: true
95
119
  homepage: http://github.com/projecthydra/solrizer
96
120
  licenses: []
@@ -129,5 +153,7 @@ test_files:
129
153
  - spec/integration/indexer_spec.rb
130
154
  - spec/spec_helper.rb
131
155
  - spec/units/extractor_spec.rb
156
+ - spec/units/field_name_mapper_spec.rb
132
157
  - spec/units/indexer_spec.rb
133
158
  - spec/units/shelver_spec.rb
159
+ - spec/units/xml_terminology_based_solrizer_spec.rb