solrizer 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
data/Rakefile CHANGED
@@ -11,6 +11,7 @@ begin
11
11
  gem.homepage = "http://github.com/projecthydra/solrizer"
12
12
  gem.authors = ["Matt Zumwalt"]
13
13
  gem.add_dependency "active-fedora", ">= 1.1.5"
14
+ gem.add_dependency "om", ">= 1.0.0" # only required by xml/terminology_based_solrizer ...
14
15
  gem.add_development_dependency "rspec", ">= 1.2.9"
15
16
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
16
17
  end
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.1.3
1
+ 0.2.0
@@ -0,0 +1,14 @@
1
+ id: id
2
+ date: _dt
3
+ string: _t
4
+ text: _t
5
+ symbol: _s
6
+ integer: _i
7
+ long: _l
8
+ boolean: _b
9
+ float: _f
10
+ double: _d
11
+ facet: _facet
12
+ display: _display
13
+ sort: _sort
14
+ unstemmed_search: _unstem_search
@@ -0,0 +1,62 @@
1
+ require "yaml"
2
+
3
+ module Solrizer
4
+ module FieldNameMapper
5
+
6
+ # Module Methods & Attributes
7
+ @@mappings = {}
8
+
9
+ # Generates solr field names from settings in solr_mappings
10
+ def self.solr_name(field_name, field_type)
11
+ name = field_name.to_s + self.mappings[field_type.to_s].to_s
12
+ if field_name.kind_of?(Symbol)
13
+ return name.to_sym
14
+ else
15
+ return name.to_s
16
+ end
17
+ end
18
+
19
+ def self.mappings
20
+ @@mappings
21
+ end
22
+
23
+ def self.mappings=(mappings)
24
+ @@mappings = mappings
25
+ end
26
+
27
+ # Instance Methods
28
+
29
+ def solr_name(field_name, field_type)
30
+ ::Solrizer::FieldNameMapper.solr_name(field_name, field_type)
31
+ end
32
+
33
+ def self.logger
34
+ @logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
35
+ end
36
+
37
+ # Loads solr mappings from yml file.
38
+ # @config_path This is the path to the directory where your mappings file is stored. @default "RAILS_ROOT/config/solr_mappings.yml"
39
+ # @mappings_file This is the filename for your solr mappings YAML file. @default solr_mappings.yml
40
+ def self.load_mappings( config_path=nil )
41
+
42
+ if config_path.nil?
43
+ if defined?(RAILS_ROOT)
44
+ config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
45
+ end
46
+ # Default to using the config file within the gem
47
+ if !File.exist?(config_path.to_s)
48
+ config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
49
+ end
50
+ end
51
+
52
+ logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
53
+
54
+ @@mappings = YAML::load(File.open(config_path))
55
+
56
+ mappings["id"] = "id" unless mappings["id"]
57
+ end
58
+
59
+ # This ensures that some mappings will always be loaded
60
+ self.load_mappings
61
+ end #FieldNameMapper
62
+ end #Solrizer
@@ -0,0 +1,104 @@
1
+ # This module is only suitable to mix into Classes that use the OM::XML::Document Module
2
+ module Solrizer::XML::TerminologyBasedSolrizer
3
+
4
+ # Module Methods
5
+
6
+ # Build a solr document from +doc+ based on its terminology
7
+ # @doc OM::XML::Document
8
+ # @solr_doc (optional) Solr::Document to populate
9
+ def self.solrize(doc, solr_doc=Solr::Document.new)
10
+ unless doc.class.terminology.nil?
11
+ doc.class.terminology.terms.each_pair do |term_name,term|
12
+ doc.solrize_term(term, solr_doc)
13
+ # self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
14
+ end
15
+ end
16
+
17
+ return solr_doc
18
+ end
19
+
20
+ # Populate a solr document with fields based on nodes in +xml+ corresponding to the
21
+ # term identified by +term_pointer+ within +terminology+
22
+ # @doc OM::XML::Document or Nokogiri::XML::Node
23
+ # @term_pointer Array pointing to the desired term in +terminology+
24
+ def self.solrize_term(doc, term, solr_doc = Solr::Document.new, opts={})
25
+ terminology = doc.class.terminology
26
+ parents = opts.fetch(:parents, [])
27
+
28
+ term_pointer = parents+[term.name]
29
+
30
+ # term = terminology.retrieve_term(term_pointer)
31
+
32
+ # prep children hash
33
+ # child_accessors = accessor_info.fetch(:children, {})
34
+ # xpath = term.xpath_for(*term_pointer)
35
+ nodeset = doc.find_by_terms(*term_pointer)
36
+
37
+ nodeset.each do |node|
38
+ # create solr fields
39
+
40
+ self.solrize_node(node, doc, term_pointer, term, solr_doc)
41
+ term.children.each_pair do |child_term_name, child_term|
42
+ doc.solrize_term(child_term, solr_doc, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
43
+ # self.solrize_term(doc, child_term_name, child_term, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
44
+ end
45
+ end
46
+ solr_doc
47
+ end
48
+
49
+ # Populate a solr document with solr fields corresponding to the given xml node
50
+ # Field names are generated using settings from the term in the +doc+'s terminology corresponding to +term_pointer+
51
+ # @doc OM::XML::Document or Nokogiri::XML::Node
52
+ # @term_pointer Array pointing to the desired term in +terminology+
53
+ # @solr_doc (optional) Solr::Document to populate
54
+ def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new)
55
+ terminology = doc.class.terminology
56
+ # term = terminology.retrieve_term(*term_pointer)
57
+
58
+ if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
59
+ node_value = node.value
60
+ else
61
+ node_value = node.text
62
+ end
63
+
64
+ generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
65
+ generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
66
+
67
+ solr_doc << Solr::Field.new(generic_field_name => node_value)
68
+
69
+ if term_pointer.length > 1
70
+ hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
71
+ hierarchical_field_name = self.generate_solr_symbol(hierarchical_field_name_base, term.data_type)
72
+ solr_doc << Solr::Field.new(hierarchical_field_name => node_value)
73
+ end
74
+ solr_doc
75
+ end
76
+
77
+ # Use Solrizer::FieldNameMapper to generate an appropriate solr field name +field_name+ and +field_type+
78
+ def self.generate_solr_symbol(field_name, field_type) # :nodoc:
79
+ Solrizer::FieldNameMapper.solr_name(field_name, field_type)
80
+ end
81
+
82
+ # Instance Methods
83
+
84
+
85
+ def to_solr(solr_doc = Solr::Document.new) # :nodoc:
86
+ Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc)
87
+ end
88
+
89
+
90
+ def solrize_term(term, solr_doc = Solr::Document.new, opts={})
91
+ Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, opts)
92
+ end
93
+
94
+ def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new)
95
+ Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc)
96
+ end
97
+
98
+ protected
99
+
100
+ def generate_solr_symbol(field_name, field_type) # :nodoc:
101
+ Solrizer::XML::TerminologyBasedSolrizer.generate_solr_symbol(field_name, field_type)
102
+ end
103
+
104
+ end
@@ -0,0 +1,4 @@
1
+ module Solrizer::XML
2
+ end
3
+
4
+ require "solrizer/xml/terminology_based_solrizer"
data/lib/solrizer.rb CHANGED
@@ -1,10 +1,14 @@
1
1
  require 'rubygems'
2
2
  require 'solrizer/indexer.rb'
3
+ require 'solrizer/field_name_mapper.rb'
4
+
5
+ # Let people explicitly require xml support if they want it ...
6
+ # require 'solrizer/xml.rb'
7
+
3
8
  # require 'fastercsv'
4
9
  require "ruby-debug"
5
10
 
6
11
 
7
-
8
12
  module Solrizer
9
13
  class Solrizer
10
14
 
@@ -34,7 +38,19 @@ class Solrizer
34
38
 
35
39
  start = Time.now
36
40
  print "Retrieving object #{obj} ..."
37
- obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
41
+
42
+ case obj
43
+ when ActiveFedora::Base
44
+ # do nothing
45
+ when Fedora::FedoraObject
46
+ obj = Repository.get_object( obj.pid )
47
+ when String
48
+ obj = Repository.get_object( obj )
49
+ else
50
+ raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
51
+ end
52
+
53
+ # obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
38
54
 
39
55
  obj_done = Time.now
40
56
  obj_done_elapse = obj_done - start
data/solrizer.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{solrizer}
8
- s.version = "0.1.3"
8
+ s.version = "0.2.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Matt Zumwalt"]
12
- s.date = %q{2010-09-10}
12
+ s.date = %q{2010-09-15}
13
13
  s.description = %q{Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener}
14
14
  s.email = %q{matt.zumwalt@yourmediashelf.com}
15
15
  s.extra_rdoc_files = [
@@ -26,13 +26,17 @@ Gem::Specification.new do |s|
26
26
  "config/fedora.yml",
27
27
  "config/hydra_types.yml",
28
28
  "config/solr.yml",
29
+ "config/solr_mappings.yml",
29
30
  "lib/solrizer.rb",
30
31
  "lib/solrizer/configuration.rb",
31
32
  "lib/solrizer/extractor.rb",
33
+ "lib/solrizer/field_name_mapper.rb",
32
34
  "lib/solrizer/indexer.rb",
33
35
  "lib/solrizer/main.rb",
34
36
  "lib/solrizer/replicator.rb",
35
37
  "lib/solrizer/repository.rb",
38
+ "lib/solrizer/xml.rb",
39
+ "lib/solrizer/xml/terminology_based_solrizer.rb",
36
40
  "lib/tasks/solrizer.rake",
37
41
  "solrizer.gemspec",
38
42
  "spec/fixtures/druid-bv448hq0314-descMetadata.xml",
@@ -40,14 +44,18 @@ Gem::Specification.new do |s|
40
44
  "spec/fixtures/druid-cm234kq4672-extProperties.xml",
41
45
  "spec/fixtures/druid-cm234kq4672-stories.xml",
42
46
  "spec/fixtures/druid-hc513kw4806-descMetadata.xml",
47
+ "spec/fixtures/mods_articles/hydrangea_article1.xml",
43
48
  "spec/fixtures/rels_ext_cmodel.xml",
49
+ "spec/fixtures/solr_mappings_af_0.1.yml",
44
50
  "spec/integration/indexer_spec.rb",
45
51
  "spec/rcov.opts",
46
52
  "spec/spec.opts",
47
53
  "spec/spec_helper.rb",
48
54
  "spec/units/extractor_spec.rb",
55
+ "spec/units/field_name_mapper_spec.rb",
49
56
  "spec/units/indexer_spec.rb",
50
- "spec/units/shelver_spec.rb"
57
+ "spec/units/shelver_spec.rb",
58
+ "spec/units/xml_terminology_based_solrizer_spec.rb"
51
59
  ]
52
60
  s.homepage = %q{http://github.com/projecthydra/solrizer}
53
61
  s.rdoc_options = ["--charset=UTF-8"]
@@ -58,8 +66,10 @@ Gem::Specification.new do |s|
58
66
  "spec/integration/indexer_spec.rb",
59
67
  "spec/spec_helper.rb",
60
68
  "spec/units/extractor_spec.rb",
69
+ "spec/units/field_name_mapper_spec.rb",
61
70
  "spec/units/indexer_spec.rb",
62
- "spec/units/shelver_spec.rb"
71
+ "spec/units/shelver_spec.rb",
72
+ "spec/units/xml_terminology_based_solrizer_spec.rb"
63
73
  ]
64
74
 
65
75
  if s.respond_to? :specification_version then
@@ -68,13 +78,16 @@ Gem::Specification.new do |s|
68
78
 
69
79
  if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
70
80
  s.add_runtime_dependency(%q<active-fedora>, [">= 1.1.5"])
81
+ s.add_runtime_dependency(%q<om>, [">= 1.0.0"])
71
82
  s.add_development_dependency(%q<rspec>, [">= 1.2.9"])
72
83
  else
73
84
  s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
85
+ s.add_dependency(%q<om>, [">= 1.0.0"])
74
86
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
75
87
  end
76
88
  else
77
89
  s.add_dependency(%q<active-fedora>, [">= 1.1.5"])
90
+ s.add_dependency(%q<om>, [">= 1.0.0"])
78
91
  s.add_dependency(%q<rspec>, [">= 1.2.9"])
79
92
  end
80
93
  end
@@ -0,0 +1,90 @@
1
+ <mods version="3.0" xsi:schemaLocation="http://www.loc.gov/mods/v3
2
+ http://www.loc.gov/standards/mods/v3/mods-3-0.xsd" xmlns="http://www.loc.gov/mods/v3" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance">
3
+
4
+ <titleInfo>
5
+ <nonSort>THE</nonSort>
6
+ <title>ARTICLE TITLE HYDRANGEA ARTICLE 1</title>
7
+ <subTitle>SUBTITLE</subTitle>
8
+ </titleInfo>
9
+ <titleInfo lang="finnish">
10
+ <title>Artikkelin otsikko Hydrangea artiklan 1</title>
11
+ </titleInfo>
12
+
13
+ <name type="personal">
14
+ <namePart type="family">FAMILY NAME</namePart>
15
+ <namePart type="given">GIVEN NAMES</namePart>
16
+ <namePart type="termsOfAddress">DR.</namePart>
17
+ <displayForm>NAME AS IT APPEARS</displayForm>
18
+ <affiliation>FACULTY, UNIVERSITY</affiliation>
19
+ <role>
20
+ <roleTerm authority="marcrelator" type="text">creator</roleTerm>
21
+ </role>
22
+ <role>
23
+ <roleTerm type="text">submitter</roleTerm>
24
+ </role>
25
+ </name>
26
+
27
+ <name type="personal">
28
+ <namePart type="family">Gautama</namePart>
29
+ <namePart type="given">Siddartha</namePart>
30
+ <namePart type="termsOfAddress">Prince</namePart>
31
+ <affiliation>Nirvana</affiliation>
32
+ <role>
33
+ <roleTerm authority="marcrelator" type="text">teacher</roleTerm>
34
+ </role>
35
+ </name>
36
+
37
+ <typeOfResource>text</typeOfResource>
38
+ <genre authority="local">journal article</genre>
39
+
40
+ <abstract>ABSTRACT</abstract>
41
+ <subject>
42
+ <topic>TOPIC 1</topic>
43
+ <topic>TOPIC 2</topic>
44
+ </subject>
45
+ <subject authority="AUTHORITY SOURCE (RFCD, LCSH)">
46
+ <topic>CONTROLLED TERM</topic>
47
+ </subject>
48
+
49
+ <language>
50
+ <languageTerm authority="iso639-2b" type="code">en-aus </languageTerm>
51
+ </language>
52
+
53
+ <physicalDescription>
54
+ <internetMediaType>application/pdf</internetMediaType>
55
+ <extent>36 p.</extent>
56
+ </physicalDescription>
57
+
58
+ <relatedItem type="host">
59
+ <titleInfo>
60
+ <title>TITLE OF HOST JOURNAL</title>
61
+ </titleInfo>
62
+ <originInfo>
63
+ <publisher>PUBLISHER</publisher>
64
+ <dateIssued>DATE</dateIssued>
65
+ </originInfo>
66
+ <identifier type="issn">0013-8908</identifier>
67
+ <part>
68
+ <detail type="volume">
69
+ <number>2</number>
70
+ </detail>
71
+ <detail type="level">
72
+ <number>2</number>
73
+ </detail>
74
+ <extent unit="pages">
75
+ <start>195</start>
76
+ <end>230</end>
77
+ </extent>
78
+ <date>FEB. 2007</date>
79
+ </part>
80
+ </relatedItem>
81
+
82
+ <identifier type="uri">http://URL.edu.au/</identifier>
83
+ <identifier type="doi">doi:10.1006/jmbi.1995.0238</identifier>
84
+ <location>
85
+ <url>http://URL.edu.au/</url>
86
+ </location>
87
+ <accessCondition type="restrictionOnAccess">EMBARGO NOTE</accessCondition>
88
+ <accessCondition type="use and reproduction">OPEN ACCESS</accessCondition>
89
+
90
+ </mods>
@@ -0,0 +1,16 @@
1
+ id: id
2
+ date: _date
3
+ string: _field
4
+ text: _field
5
+ symbol: _field
6
+ integer: _field
7
+ long: _field
8
+ boolean: _field
9
+ float: _field
10
+ double: _field
11
+ facet: _facet
12
+ display: _display
13
+ sort: _sort
14
+ unstemmed_search: _unstem_search
15
+
16
+
data/spec/spec_helper.rb CHANGED
@@ -4,6 +4,8 @@ require 'solrizer'
4
4
  require 'spec'
5
5
  require 'spec/autorun'
6
6
 
7
+ require 'solrizer'
8
+
7
9
  Spec::Runner.configure do |config|
8
10
 
9
11
  config.mock_with :mocha
@@ -0,0 +1,41 @@
1
+ require File.join( File.dirname(__FILE__), "..", "spec_helper" )
2
+
3
+ # require 'solrizer'
4
+ # require 'solrizer/field_name_mapper'
5
+
6
+ class FieldNameMapperTest
7
+ include Solrizer::FieldNameMapper
8
+ end
9
+
10
+ def helper
11
+ @test_instance
12
+ end
13
+
14
+ describe Solrizer::FieldNameMapper do
15
+
16
+ before(:each) do
17
+ @test_instance = FieldNameMapperTest.new
18
+ end
19
+
20
+ after(:all) do
21
+ # Revert to default mappings after running tests
22
+ Solrizer::FieldNameMapper.load_mappings
23
+ end
24
+
25
+ describe ".solr_name" do
26
+ it "should generate solr field names from settings in solr_mappings" do
27
+ helper.solr_name(:system_create, :date).should == :system_create_dt
28
+ end
29
+ it "should format the response based on the class of the input" do
30
+ helper.solr_name(:system_create, :date).should == :system_create_dt
31
+ helper.solr_name("system_create", :date).should == "system_create_dt"
32
+ end
33
+ it "should rely on whichever mappings have been loaded into the SolrService" do
34
+ helper.solr_name(:system_create, :date).should == :system_create_dt
35
+ helper.solr_name(:foo, :text).should == :foo_t
36
+ Solrizer::FieldNameMapper.load_mappings(File.join(File.dirname(__FILE__), "..", "fixtures", "solr_mappings_af_0.1.yml"))
37
+ helper.solr_name(:system_create, :date).should == :system_create_date
38
+ helper.solr_name(:foo, :text).should == :foo_field
39
+ end
40
+ end
41
+ end
@@ -1,5 +1,4 @@
1
1
  require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
- require 'solrizer'
3
2
 
4
3
  describe Solrizer::Solrizer do
5
4
 
@@ -9,17 +8,17 @@ describe Solrizer::Solrizer do
9
8
 
10
9
  describe "solrize" do
11
10
  it "should trigger the indexer for the provided object" do
12
- # sample_obj = ActiveFedora::Base.new
13
- mock_object = mock("my object")
14
- mock_object.expects(:kind_of?).with(ActiveFedora::Base).returns(true)
15
- mock_object.stubs(:pid)
16
- mock_object.stubs(:label)
17
- mock_object.stubs(:datastreams).returns({'descMetadata'=>"foo","location"=>"bar"})
18
- ActiveFedora::Base.expects(:load_instance).never
11
+ sample_obj = ActiveFedora::Base.new
12
+ @solrizer.indexer.expects(:index).with( sample_obj )
13
+ @solrizer.solrize( sample_obj )
14
+ end
15
+ it "should work with Fedora::FedoraObject objects" do
16
+ mock_object = Fedora::FedoraObject.new(:pid=>"my:pid", :label=>"my label")
17
+ ActiveFedora::Base.expects(:load_instance).with( mock_object.pid ).returns(mock_object)
19
18
  @solrizer.indexer.expects(:index).with( mock_object )
20
19
  @solrizer.solrize( mock_object )
21
20
  end
22
- it "should still load the object if only a pid is provided" do
21
+ it "should load the object if only a pid is provided" do
23
22
  mock_object = mock("my object")
24
23
  mock_object.stubs(:pid)
25
24
  mock_object.stubs(:label)
@@ -29,13 +28,14 @@ describe Solrizer::Solrizer do
29
28
  @solrizer.indexer.expects(:index).with(mock_object)
30
29
  @solrizer.solrize("_PID_")
31
30
  end
31
+
32
32
  end
33
33
 
34
34
  describe "solrize_objects" do
35
- it "should call solrize for each pid returned by solr" do
36
- pids = [["pid1"], ["pid2"], ["pid3"]]
37
- Solrizer::Repository.expects(:get_pids).returns(pids)
38
- pids.each {|pid| @solrizer.expects(:solrize).with( pid ) }
35
+ it "should call solrize for each object returned by Fedora::Repository.find_objects" do
36
+ objects = [["pid1"], ["pid2"], ["pid3"]]
37
+ Fedora::Repository.any_instance.expects(:find_objects).returns(objects)
38
+ objects.each {|object| @solrizer.expects(:solrize).with( object ) }
39
39
  @solrizer.solrize_objects
40
40
  end
41
41
  end
@@ -0,0 +1,88 @@
1
+ require File.expand_path(File.dirname(__FILE__) + '/../spec_helper')
2
+ require 'solrizer'
3
+ require 'solrizer/xml'
4
+
5
+ describe Solrizer::XML::TerminologyBasedSolrizer do
6
+
7
+ before(:all) do
8
+ OM::Samples::ModsArticle.send(:include, Solrizer::XML::TerminologyBasedSolrizer)
9
+ end
10
+
11
+ before(:each) do
12
+ article_xml = fixture( File.join("mods_articles", "hydrangea_article1.xml") )
13
+ @mods_article = OM::Samples::ModsArticle.from_xml(article_xml)
14
+ end
15
+
16
+ describe ".to_solr" do
17
+
18
+ # after(:all) do
19
+ # # Revert to default mappings after running tests
20
+ # ActiveFedora::SolrService.load_mappings
21
+ # end
22
+
23
+ it "should provide .to_solr and return a SolrDocument" do
24
+ @mods_article.should respond_to(:to_solr)
25
+ @mods_article.to_solr.should be_kind_of(Solr::Document)
26
+ end
27
+
28
+ it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
29
+ doc = Solr::Document.new
30
+ @mods_article.to_solr(doc).should equal(doc)
31
+ end
32
+
33
+ it "should iterate through the terminology terms, calling .solrize_term on each and passing in the solr doc" do
34
+ # mock_terms = {:name1=>:term1, :name2=>:term2}
35
+ # ActiveFedora::NokogiriDatastream.stubs(:accessors).returns(mock_accessors)
36
+ solr_doc = Solr::Document.new
37
+ OM::Samples::ModsArticle.terminology.terms.each_pair do |k,v|
38
+ @mods_article.expects(:solrize_term).with(v, solr_doc)
39
+ end
40
+ @mods_article.to_solr(solr_doc)
41
+ end
42
+
43
+ it "should use Solr mappings to generate field names" do
44
+
45
+ solr_doc = @mods_article.to_solr
46
+ #should have these
47
+
48
+ solr_doc[:abstract].should be_nil
49
+ solr_doc[:abstract_t].should == "ABSTRACT"
50
+ solr_doc[:title_info_1_language_t].should == "finnish"
51
+ solr_doc[:person_1_role_0_text_t].should == "teacher"
52
+
53
+ # These are a holdover from an old verison of OM
54
+ # solr_doc[:finnish_title_info_language_t].should == "finnish"
55
+ # solr_doc[:finnish_title_info_main_title_t].should == "Artikkelin otsikko Hydrangea artiklan 1"
56
+
57
+ # solr_doc[:mydate_date].should == "fake-date"
58
+ #
59
+ # solr_doc[:publisher_t].should be_nil
60
+ # solr_doc[:coverage_t].should be_nil
61
+ # solr_doc[:creation_date_dt].should be_nil
62
+ # solr_doc.should == ""
63
+
64
+ end
65
+
66
+ end
67
+
68
+ describe ".solrize_term" do
69
+
70
+ it "should add fields to a solr document for all nodes corresponding to the given term and its children" do
71
+ solr_doc = Solr::Document.new
72
+ result = @mods_article.solrize_term(OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), solr_doc)
73
+ result.should == solr_doc
74
+ # @mods_article.solrize_term(:title_info, OM::Samples::ModsArticle.terminology.retrieve_term(:title_info), :solr_doc=>solr_doc).should == ""
75
+ end
76
+
77
+ end
78
+
79
+ describe ".solrize_node" do
80
+ it "should optionally allow you to provide the Solr::Document to add fields to and return that document when done" do
81
+ doc = Solr::Document.new
82
+ # @mods_article.solrize_node(node, term_pointer, term, solr_doc).should equal(doc)
83
+ end
84
+ it "should create a solr field containing node.text"
85
+ it "should create hierarchical field entries if parents is not empty"
86
+ it "should only create one node if parents is empty"
87
+ end
88
+ end
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: solrizer
3
3
  version: !ruby/object:Gem::Version
4
- hash: 29
4
+ hash: 23
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 1
9
- - 3
10
- version: 0.1.3
8
+ - 2
9
+ - 0
10
+ version: 0.2.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - Matt Zumwalt
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2010-09-10 00:00:00 -05:00
18
+ date: 2010-09-15 00:00:00 -05:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency
@@ -35,9 +35,25 @@ dependencies:
35
35
  type: :runtime
36
36
  version_requirements: *id001
37
37
  - !ruby/object:Gem::Dependency
38
- name: rspec
38
+ name: om
39
39
  prerelease: false
40
40
  requirement: &id002 !ruby/object:Gem::Requirement
41
+ none: false
42
+ requirements:
43
+ - - ">="
44
+ - !ruby/object:Gem::Version
45
+ hash: 23
46
+ segments:
47
+ - 1
48
+ - 0
49
+ - 0
50
+ version: 1.0.0
51
+ type: :runtime
52
+ version_requirements: *id002
53
+ - !ruby/object:Gem::Dependency
54
+ name: rspec
55
+ prerelease: false
56
+ requirement: &id003 !ruby/object:Gem::Requirement
41
57
  none: false
42
58
  requirements:
43
59
  - - ">="
@@ -49,7 +65,7 @@ dependencies:
49
65
  - 9
50
66
  version: 1.2.9
51
67
  type: :development
52
- version_requirements: *id002
68
+ version_requirements: *id003
53
69
  description: Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener
54
70
  email: matt.zumwalt@yourmediashelf.com
55
71
  executables: []
@@ -69,13 +85,17 @@ files:
69
85
  - config/fedora.yml
70
86
  - config/hydra_types.yml
71
87
  - config/solr.yml
88
+ - config/solr_mappings.yml
72
89
  - lib/solrizer.rb
73
90
  - lib/solrizer/configuration.rb
74
91
  - lib/solrizer/extractor.rb
92
+ - lib/solrizer/field_name_mapper.rb
75
93
  - lib/solrizer/indexer.rb
76
94
  - lib/solrizer/main.rb
77
95
  - lib/solrizer/replicator.rb
78
96
  - lib/solrizer/repository.rb
97
+ - lib/solrizer/xml.rb
98
+ - lib/solrizer/xml/terminology_based_solrizer.rb
79
99
  - lib/tasks/solrizer.rake
80
100
  - solrizer.gemspec
81
101
  - spec/fixtures/druid-bv448hq0314-descMetadata.xml
@@ -83,14 +103,18 @@ files:
83
103
  - spec/fixtures/druid-cm234kq4672-extProperties.xml
84
104
  - spec/fixtures/druid-cm234kq4672-stories.xml
85
105
  - spec/fixtures/druid-hc513kw4806-descMetadata.xml
106
+ - spec/fixtures/mods_articles/hydrangea_article1.xml
86
107
  - spec/fixtures/rels_ext_cmodel.xml
108
+ - spec/fixtures/solr_mappings_af_0.1.yml
87
109
  - spec/integration/indexer_spec.rb
88
110
  - spec/rcov.opts
89
111
  - spec/spec.opts
90
112
  - spec/spec_helper.rb
91
113
  - spec/units/extractor_spec.rb
114
+ - spec/units/field_name_mapper_spec.rb
92
115
  - spec/units/indexer_spec.rb
93
116
  - spec/units/shelver_spec.rb
117
+ - spec/units/xml_terminology_based_solrizer_spec.rb
94
118
  has_rdoc: true
95
119
  homepage: http://github.com/projecthydra/solrizer
96
120
  licenses: []
@@ -129,5 +153,7 @@ test_files:
129
153
  - spec/integration/indexer_spec.rb
130
154
  - spec/spec_helper.rb
131
155
  - spec/units/extractor_spec.rb
156
+ - spec/units/field_name_mapper_spec.rb
132
157
  - spec/units/indexer_spec.rb
133
158
  - spec/units/shelver_spec.rb
159
+ - spec/units/xml_terminology_based_solrizer_spec.rb