solrizer 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/Gemfile +14 -0
- data/Gemfile.lock +44 -0
- data/History.txt +8 -0
- data/Rakefile +10 -3
- data/VERSION +1 -1
- data/config/solr_mappings.yml +16 -13
- data/config/solr_mappings_af_0.1.yml +18 -0
- data/lib/solrizer/extractor.rb +31 -72
- data/lib/solrizer/field_mapper.rb +351 -0
- data/lib/solrizer/field_name_mapper.rb +37 -51
- data/lib/solrizer/html/extractor.rb +36 -0
- data/lib/solrizer/html.rb +7 -0
- data/lib/solrizer/xml/extractor.rb +31 -0
- data/lib/solrizer/xml/terminology_based_solrizer.rb +25 -29
- data/lib/solrizer/xml.rb +4 -1
- data/lib/solrizer.rb +2 -113
- data/lib/tasks/solrizer.rake +7 -27
- data/solrizer.gemspec +46 -26
- data/spec/{spec.opts → .rspec} +0 -0
- data/spec/fixtures/test_solr_mappings.yml +16 -0
- data/spec/spec_helper.rb +1 -0
- data/spec/units/extractor_spec.rb +43 -34
- data/spec/units/field_mapper_spec.rb +227 -0
- data/spec/units/field_name_mapper_spec.rb +16 -29
- data/spec/units/xml_extractor_spec.rb +28 -0
- data/spec/units/xml_terminology_based_solrizer_spec.rb +18 -5
- metadata +128 -35
- data/lib/solrizer/configuration.rb +0 -8
- data/lib/solrizer/indexer.rb +0 -261
- data/lib/solrizer/main.rb +0 -17
- data/lib/solrizer/replicator.rb +0 -143
- data/lib/solrizer/repository.rb +0 -54
- data/spec/fixtures/rels_ext_cmodel.xml +0 -8
- data/spec/fixtures/solr_mappings_af_0.1.yml +0 -16
- data/spec/integration/indexer_spec.rb +0 -18
- data/spec/units/indexer_spec.rb +0 -127
- data/spec/units/shelver_spec.rb +0 -42
@@ -1,62 +1,48 @@
|
|
1
|
-
|
1
|
+
# Re-Introduced for backwards compatibility
|
2
|
+
module Solrizer::FieldNameMapper
|
3
|
+
|
4
|
+
# Class Methods -- These methods will be available on classes that include this Module
|
5
|
+
|
6
|
+
module ClassMethods
|
7
|
+
def mappings
|
8
|
+
return self.default_field_mapper.mappings
|
9
|
+
end
|
2
10
|
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
11
|
+
def id_field
|
12
|
+
return self.default_field_mapper.id_field
|
13
|
+
end
|
14
|
+
|
15
|
+
# Re-loads solr mappings for the default field mapper's class
|
16
|
+
# and re-sets the default field mapper to an FieldMapper instance with those mappings.
|
17
|
+
def load_mappings( config_path=nil)
|
18
|
+
self.default_field_mapper.class.load_mappings(config_path)
|
19
|
+
self.default_field_mapper = self.default_field_mapper.class.new
|
20
|
+
end
|
21
|
+
|
22
|
+
def solr_name(field_name, field_type, index_type = :searchable)
|
23
|
+
self.default_field_mapper.solr_name(field_name, field_type, index_type)
|
24
|
+
end
|
25
|
+
|
26
|
+
def default_field_mapper
|
27
|
+
@@default_field_mapper ||= Solrizer::FieldMapper::Default.new
|
28
|
+
end
|
29
|
+
|
30
|
+
def default_field_mapper=(field_mapper)
|
31
|
+
@@default_field_mapper = field_mapper
|
16
32
|
end
|
17
33
|
end
|
18
34
|
|
19
|
-
|
20
|
-
@@mappings
|
21
|
-
end
|
22
|
-
|
23
|
-
def self.mappings=(mappings)
|
24
|
-
@@mappings = mappings
|
25
|
-
end
|
35
|
+
# Instance Methods -- These methods will be available on instances of classes that include this module
|
26
36
|
|
27
|
-
|
37
|
+
attr_accessor :ox_namespaces
|
28
38
|
|
29
|
-
def
|
30
|
-
|
39
|
+
def self.included(klass)
|
40
|
+
klass.extend(ClassMethods)
|
31
41
|
end
|
32
42
|
|
33
|
-
def self.logger
|
34
|
-
@logger ||= defined?(RAILS_DEFAULT_LOGGER) ? RAILS_DEFAULT_LOGGER : Logger.new(STDOUT)
|
35
|
-
end
|
36
43
|
|
37
|
-
|
38
|
-
|
39
|
-
# @mappings_file This is the filename for your solr mappings YAML file. @default solr_mappings.yml
|
40
|
-
def self.load_mappings( config_path=nil )
|
41
|
-
|
42
|
-
if config_path.nil?
|
43
|
-
if defined?(RAILS_ROOT)
|
44
|
-
config_path = File.join(RAILS_ROOT, "config", "solr_mappings.yml")
|
45
|
-
end
|
46
|
-
# Default to using the config file within the gem
|
47
|
-
if !File.exist?(config_path.to_s)
|
48
|
-
config_path = File.join(File.dirname(__FILE__), "..", "..", "config", "solr_mappings.yml")
|
49
|
-
end
|
50
|
-
end
|
51
|
-
|
52
|
-
logger.info("SOLRIZER: loading field name mappings from #{File.expand_path(config_path)}")
|
53
|
-
|
54
|
-
@@mappings = YAML::load(File.open(config_path))
|
55
|
-
|
56
|
-
mappings["id"] = "id" unless mappings["id"]
|
44
|
+
def solr_name(field_name, field_type, index_type = :searchable)
|
45
|
+
self.class.solr_name(field_name, field_type, index_type)
|
57
46
|
end
|
58
47
|
|
59
|
-
|
60
|
-
self.load_mappings
|
61
|
-
end #FieldNameMapper
|
62
|
-
end #Solrizer
|
48
|
+
end
|
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'solr'
|
2
|
+
require 'rexml/document'
|
3
|
+
require "nokogiri"
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Solrizer::HTML::Extractor
|
7
|
+
|
8
|
+
#
|
9
|
+
# This method strips html tags out and returns content to be indexed in solr
|
10
|
+
#
|
11
|
+
def html_to_solr( ds, solr_doc=Solr::Document.new )
|
12
|
+
|
13
|
+
text = CGI.unescapeHTML(ds.content)
|
14
|
+
doc = Nokogiri::HTML(text)
|
15
|
+
|
16
|
+
# html to story_display
|
17
|
+
stories = doc.xpath('//story')
|
18
|
+
|
19
|
+
stories.each do |story|
|
20
|
+
solr_doc << Solr::Field.new(:story_display => story.children.to_xml)
|
21
|
+
end
|
22
|
+
|
23
|
+
#strip out text and put in story_t
|
24
|
+
text_nodes = doc.xpath("//text()")
|
25
|
+
text = String.new
|
26
|
+
|
27
|
+
text_nodes.each do |text_node|
|
28
|
+
text << text_node.content
|
29
|
+
end
|
30
|
+
|
31
|
+
solr_doc << Solr::Field.new(:story_t => text)
|
32
|
+
|
33
|
+
return solr_doc
|
34
|
+
end
|
35
|
+
|
36
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
require 'solr'
|
2
|
+
require 'rexml/document'
|
3
|
+
require "nokogiri"
|
4
|
+
require 'yaml'
|
5
|
+
|
6
|
+
module Solrizer::XML::Extractor
|
7
|
+
|
8
|
+
def extract_tags(text)
|
9
|
+
doc = REXML::Document.new( text )
|
10
|
+
extract_tag(doc, 'archivist_tags').merge(extract_tag(doc, 'donor_tags'))
|
11
|
+
end
|
12
|
+
|
13
|
+
def extract_tag(doc, type)
|
14
|
+
tags = doc.elements["/fields/#{type}"]
|
15
|
+
return {} unless tags
|
16
|
+
{type => tags.text.split(/,/).map {|t| t.strip}}
|
17
|
+
end
|
18
|
+
|
19
|
+
#
|
20
|
+
# This method extracts solr fields from simple xml
|
21
|
+
#
|
22
|
+
def xml_to_solr( text, solr_doc=Solr::Document.new )
|
23
|
+
doc = REXML::Document.new( text )
|
24
|
+
doc.root.elements.each do |element|
|
25
|
+
solr_doc << Solr::Field.new( :"#{element.name}_t" => "#{element.text}" )
|
26
|
+
end
|
27
|
+
|
28
|
+
return solr_doc
|
29
|
+
end
|
30
|
+
|
31
|
+
end
|
@@ -1,15 +1,19 @@
|
|
1
1
|
# This module is only suitable to mix into Classes that use the OM::XML::Document Module
|
2
2
|
module Solrizer::XML::TerminologyBasedSolrizer
|
3
3
|
|
4
|
+
def self.default_field_mapper
|
5
|
+
@@default_field_mapper ||= Solrizer::FieldMapper::Default.new
|
6
|
+
end
|
7
|
+
|
4
8
|
# Module Methods
|
5
9
|
|
6
10
|
# Build a solr document from +doc+ based on its terminology
|
7
11
|
# @doc OM::XML::Document
|
8
12
|
# @solr_doc (optional) Solr::Document to populate
|
9
|
-
def self.solrize(doc, solr_doc=Solr::Document.new)
|
13
|
+
def self.solrize(doc, solr_doc=Solr::Document.new, field_mapper = nil)
|
10
14
|
unless doc.class.terminology.nil?
|
11
15
|
doc.class.terminology.terms.each_pair do |term_name,term|
|
12
|
-
doc.solrize_term(term, solr_doc)
|
16
|
+
doc.solrize_term(term, solr_doc, field_mapper)
|
13
17
|
# self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
|
14
18
|
end
|
15
19
|
end
|
@@ -21,7 +25,7 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
21
25
|
# term identified by +term_pointer+ within +terminology+
|
22
26
|
# @doc OM::XML::Document or Nokogiri::XML::Node
|
23
27
|
# @term_pointer Array pointing to the desired term in +terminology+
|
24
|
-
def self.solrize_term(doc, term, solr_doc = Solr::Document.new, opts={})
|
28
|
+
def self.solrize_term(doc, term, solr_doc = Solr::Document.new, field_mapper = nil, opts={})
|
25
29
|
terminology = doc.class.terminology
|
26
30
|
parents = opts.fetch(:parents, [])
|
27
31
|
|
@@ -37,9 +41,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
37
41
|
nodeset.each do |node|
|
38
42
|
# create solr fields
|
39
43
|
|
40
|
-
self.solrize_node(node, doc, term_pointer, term, solr_doc)
|
44
|
+
self.solrize_node(node, doc, term_pointer, term, solr_doc, field_mapper)
|
41
45
|
term.children.each_pair do |child_term_name, child_term|
|
42
|
-
doc.solrize_term(child_term, solr_doc, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
46
|
+
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
43
47
|
# self.solrize_term(doc, child_term_name, child_term, opts={:solr_doc=>solr_doc, :parents=>parents+[{accessor_name=>nodeset.index(node)}] })
|
44
48
|
end
|
45
49
|
end
|
@@ -51,7 +55,8 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
51
55
|
# @doc OM::XML::Document or Nokogiri::XML::Node
|
52
56
|
# @term_pointer Array pointing to the desired term in +terminology+
|
53
57
|
# @solr_doc (optional) Solr::Document to populate
|
54
|
-
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new)
|
58
|
+
def self.solrize_node(node, doc, term_pointer, term, solr_doc = Solr::Document.new, field_mapper = nil, opts = {})
|
59
|
+
field_mapper ||= self.default_field_mapper
|
55
60
|
terminology = doc.class.terminology
|
56
61
|
# term = terminology.retrieve_term(*term_pointer)
|
57
62
|
|
@@ -62,43 +67,34 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
62
67
|
end
|
63
68
|
|
64
69
|
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
65
|
-
generic_field_name = generate_solr_symbol(generic_field_name_base, term.data_type)
|
66
70
|
|
67
|
-
|
71
|
+
field_mapper.solr_names_and_values(generic_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
72
|
+
solr_doc << Solr::Field.new(field_name => field_value)
|
73
|
+
end
|
68
74
|
|
69
75
|
if term_pointer.length > 1
|
70
76
|
hierarchical_field_name_base = OM::XML::Terminology.term_hierarchical_name(*term_pointer)
|
71
|
-
|
72
|
-
|
77
|
+
field_mapper.solr_names_and_values(hierarchical_field_name_base, node_value, term.data_type, term.index_as).each do |field_name, field_value|
|
78
|
+
solr_doc << Solr::Field.new(field_name => field_value)
|
79
|
+
end
|
73
80
|
end
|
74
81
|
solr_doc
|
75
82
|
end
|
76
83
|
|
77
|
-
# Use Solrizer::FieldNameMapper to generate an appropriate solr field name +field_name+ and +field_type+
|
78
|
-
def self.generate_solr_symbol(field_name, field_type) # :nodoc:
|
79
|
-
Solrizer::FieldNameMapper.solr_name(field_name, field_type)
|
80
|
-
end
|
81
|
-
|
82
84
|
# Instance Methods
|
83
85
|
|
86
|
+
attr_accessor :field_mapper
|
84
87
|
|
85
|
-
def to_solr(solr_doc = Solr::Document.new) # :nodoc:
|
86
|
-
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc)
|
88
|
+
def to_solr(solr_doc = Solr::Document.new, field_mapper = self.field_mapper) # :nodoc:
|
89
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize(self, solr_doc, field_mapper)
|
87
90
|
end
|
88
91
|
|
89
|
-
|
90
|
-
|
91
|
-
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, opts)
|
92
|
+
def solrize_term(term, solr_doc = Solr::Document.new, field_mapper = self.field_mapper, opts={})
|
93
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_term(self, term, solr_doc, field_mapper, opts)
|
92
94
|
end
|
93
95
|
|
94
|
-
def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new)
|
95
|
-
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc)
|
96
|
-
end
|
97
|
-
|
98
|
-
protected
|
99
|
-
|
100
|
-
def generate_solr_symbol(field_name, field_type) # :nodoc:
|
101
|
-
Solrizer::XML::TerminologyBasedSolrizer.generate_solr_symbol(field_name, field_type)
|
96
|
+
def solrize_node(node, term_pointer, term, solr_doc = Solr::Document.new, field_mapper = self.field_mapper, opts={})
|
97
|
+
Solrizer::XML::TerminologyBasedSolrizer.solrize_node(node, self, term_pointer, solr_doc, field_mapper, opts)
|
102
98
|
end
|
103
99
|
|
104
|
-
end
|
100
|
+
end
|
data/lib/solrizer/xml.rb
CHANGED
data/lib/solrizer.rb
CHANGED
@@ -1,115 +1,4 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
|
3
|
-
require 'solrizer/field_name_mapper.rb'
|
2
|
+
module Solrizer;end
|
4
3
|
|
5
|
-
|
6
|
-
# require 'solrizer/xml.rb'
|
7
|
-
|
8
|
-
# require 'fastercsv'
|
9
|
-
require "ruby-debug"
|
10
|
-
|
11
|
-
|
12
|
-
module Solrizer
|
13
|
-
class Solrizer
|
14
|
-
|
15
|
-
attr_accessor :indexer, :index_full_text
|
16
|
-
|
17
|
-
#
|
18
|
-
# This method initializes the indexer
|
19
|
-
# If passed an argument of :index_full_text=>true, it will perform full-text indexing instead of indexing fields only.
|
20
|
-
#
|
21
|
-
def initialize( opts={} )
|
22
|
-
@@index_list = false unless defined?(@@index_list)
|
23
|
-
if opts[:index_full_text] == true || opts[:index_full_text] == "true"
|
24
|
-
@index_full_text = true
|
25
|
-
else
|
26
|
-
@index_full_text = false
|
27
|
-
end
|
28
|
-
@indexer = Indexer.new( :index_full_text=>@index_full_text )
|
29
|
-
end
|
30
|
-
|
31
|
-
#
|
32
|
-
# This method solrizes the given Fedora object's full-text and facets into the search index
|
33
|
-
#
|
34
|
-
def solrize( obj )
|
35
|
-
# retrieve the Fedora object based on the given unique id
|
36
|
-
|
37
|
-
begin
|
38
|
-
|
39
|
-
start = Time.now
|
40
|
-
print "Retrieving object #{obj} ..."
|
41
|
-
|
42
|
-
case obj
|
43
|
-
when ActiveFedora::Base
|
44
|
-
# do nothing
|
45
|
-
when Fedora::FedoraObject
|
46
|
-
obj = Repository.get_object( obj.pid )
|
47
|
-
when String
|
48
|
-
obj = Repository.get_object( obj )
|
49
|
-
else
|
50
|
-
raise "you must pass either a ActiveFedora::Base, Fedora::RepositoryObject, or a String. You submitted a #{obj.class}"
|
51
|
-
end
|
52
|
-
|
53
|
-
# obj = obj.kind_of?(ActiveFedora::Base) ? obj : Repository.get_object( obj )
|
54
|
-
|
55
|
-
obj_done = Time.now
|
56
|
-
obj_done_elapse = obj_done - start
|
57
|
-
puts " completed. Duration: #{obj_done_elapse}"
|
58
|
-
|
59
|
-
print "\t Indexing object #{obj.pid} ... "
|
60
|
-
# add the keywords and facets to the search index
|
61
|
-
index_start = Time.now
|
62
|
-
indexer.index( obj )
|
63
|
-
|
64
|
-
index_done = Time.now
|
65
|
-
index_elapsed = index_done - index_start
|
66
|
-
|
67
|
-
puts "completed. Duration: #{index_elapsed} ."
|
68
|
-
|
69
|
-
|
70
|
-
rescue Exception => e
|
71
|
-
p "unable to index #{obj}. Failed with #{e.inspect}"
|
72
|
-
|
73
|
-
|
74
|
-
end #begin
|
75
|
-
|
76
|
-
end
|
77
|
-
|
78
|
-
#
|
79
|
-
# This method retrieves a comprehensive list of all the unique identifiers in Fedora and
|
80
|
-
# solrizes each object's full-text and facets into the search index
|
81
|
-
def solrize_objects
|
82
|
-
# retrieve a list of all the pids in the fedora repository
|
83
|
-
num_docs = 1000000 # modify this number to guarantee that all the objects are retrieved from the repository
|
84
|
-
puts "WARNING: You have turned off indexing of Full Text content. Be sure to re-run indexer with @@index_full_text set to true in main.rb" if index_full_text == false
|
85
|
-
|
86
|
-
if @@index_list == false
|
87
|
-
|
88
|
-
objects = Fedora::Repository.instance.find_objects(:limit=>num_docs)
|
89
|
-
|
90
|
-
puts "Shelving #{objects.length} Fedora objects"
|
91
|
-
objects.each do |object|
|
92
|
-
solrize( object )
|
93
|
-
end
|
94
|
-
|
95
|
-
else
|
96
|
-
|
97
|
-
if File.exists?(@@index_list)
|
98
|
-
arr_of_pids = FasterCSV.read(@@index_list, :headers=>false)
|
99
|
-
|
100
|
-
puts "Indexing from list at #{@@index_list}"
|
101
|
-
puts "Shelving #{arr_of_pids.length} Fedora objects"
|
102
|
-
|
103
|
-
arr_of_pids.each do |row|
|
104
|
-
pid = row[0]
|
105
|
-
solrize( pid )
|
106
|
-
end #FASTERCSV
|
107
|
-
else
|
108
|
-
puts "#{@@index_list} does not exists!"
|
109
|
-
end #if File.exists
|
110
|
-
|
111
|
-
end #if Index_LISTS
|
112
|
-
end #solrize_objects
|
113
|
-
|
114
|
-
end #class
|
115
|
-
end #module
|
4
|
+
Dir[File.join(File.dirname(__FILE__),"solrizer","*.rb")].each {|file| require file }
|
data/lib/tasks/solrizer.rake
CHANGED
@@ -1,33 +1,13 @@
|
|
1
1
|
namespace :solrizer do
|
2
2
|
|
3
|
-
desc '
|
4
|
-
task :solrize
|
5
|
-
|
6
|
-
if ENV['PID']
|
7
|
-
puts "indexing #{ENV['PID'].inspect}"
|
8
|
-
solrizer = Solrizer::Solrizer.new :index_full_text=> index_full_text
|
9
|
-
solrizer.solrize(ENV['PID'])
|
10
|
-
puts "Finished shelving #{ENV['PID']}"
|
11
|
-
else
|
12
|
-
puts "You must provide a pid using the format 'solrizer::solrize_object PID=sample:pid'."
|
13
|
-
end
|
3
|
+
desc 'Placeholder for generic solrization task.'
|
4
|
+
task :solrize do
|
5
|
+
puts "Nobody here. Possibly you meant to run rake solrizer:fedora:solrize PID=..."
|
14
6
|
end
|
15
7
|
|
16
|
-
desc '
|
17
|
-
task :solrize_objects
|
18
|
-
|
19
|
-
|
20
|
-
@@index_list = ENV['INDEX_LIST']
|
21
|
-
end
|
22
|
-
|
23
|
-
puts "Re-indexing Fedora Repository."
|
24
|
-
puts "Fedora URL: #{ActiveFedora.fedora_config[:url]}"
|
25
|
-
puts "Fedora Solr URL: #{ActiveFedora.solr_config[:url]}"
|
26
|
-
puts "Blacklight Solr Config: #{Blacklight.solr_config.inspect}"
|
27
|
-
puts "Doing full text index." if index_full_text
|
28
|
-
solrizer = Solrizer::Solrizer.new :index_full_text=> index_full_text
|
29
|
-
solrizer.solrize_objects
|
30
|
-
puts "Solrizer task complete."
|
31
|
-
end
|
8
|
+
desc 'Placeholder for generic solrization task.'
|
9
|
+
task :solrize_objects do
|
10
|
+
puts "Nobody here. Possibly you meant to run rake solrizer:fedora:solrize_objects"
|
11
|
+
end
|
32
12
|
|
33
13
|
end
|
data/solrizer.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{solrizer}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.3.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["Matt Zumwalt"]
|
12
|
-
s.date = %q{2010-
|
12
|
+
s.date = %q{2010-10-26}
|
13
13
|
s.description = %q{Use solrizer to populate solr indexes from Fedora repository content or from other sources. You can run solrizer from within your apps, using the provided rake tasks, or as a JMS listener}
|
14
14
|
s.email = %q{matt.zumwalt@yourmediashelf.com}
|
15
15
|
s.extra_rdoc_files = [
|
@@ -18,6 +18,8 @@ Gem::Specification.new do |s|
|
|
18
18
|
]
|
19
19
|
s.files = [
|
20
20
|
".gitignore",
|
21
|
+
"Gemfile",
|
22
|
+
"Gemfile.lock",
|
21
23
|
"History.txt",
|
22
24
|
"LICENSE",
|
23
25
|
"README.textile",
|
@@ -27,34 +29,32 @@ Gem::Specification.new do |s|
|
|
27
29
|
"config/hydra_types.yml",
|
28
30
|
"config/solr.yml",
|
29
31
|
"config/solr_mappings.yml",
|
32
|
+
"config/solr_mappings_af_0.1.yml",
|
30
33
|
"lib/solrizer.rb",
|
31
|
-
"lib/solrizer/configuration.rb",
|
32
34
|
"lib/solrizer/extractor.rb",
|
35
|
+
"lib/solrizer/field_mapper.rb",
|
33
36
|
"lib/solrizer/field_name_mapper.rb",
|
34
|
-
"lib/solrizer/
|
35
|
-
"lib/solrizer/
|
36
|
-
"lib/solrizer/replicator.rb",
|
37
|
-
"lib/solrizer/repository.rb",
|
37
|
+
"lib/solrizer/html.rb",
|
38
|
+
"lib/solrizer/html/extractor.rb",
|
38
39
|
"lib/solrizer/xml.rb",
|
40
|
+
"lib/solrizer/xml/extractor.rb",
|
39
41
|
"lib/solrizer/xml/terminology_based_solrizer.rb",
|
40
42
|
"lib/tasks/solrizer.rake",
|
41
43
|
"solrizer.gemspec",
|
44
|
+
"spec/.rspec",
|
42
45
|
"spec/fixtures/druid-bv448hq0314-descMetadata.xml",
|
43
46
|
"spec/fixtures/druid-bv448hq0314-extProperties.xml",
|
44
47
|
"spec/fixtures/druid-cm234kq4672-extProperties.xml",
|
45
48
|
"spec/fixtures/druid-cm234kq4672-stories.xml",
|
46
49
|
"spec/fixtures/druid-hc513kw4806-descMetadata.xml",
|
47
50
|
"spec/fixtures/mods_articles/hydrangea_article1.xml",
|
48
|
-
"spec/fixtures/
|
49
|
-
"spec/fixtures/solr_mappings_af_0.1.yml",
|
50
|
-
"spec/integration/indexer_spec.rb",
|
51
|
+
"spec/fixtures/test_solr_mappings.yml",
|
51
52
|
"spec/rcov.opts",
|
52
|
-
"spec/spec.opts",
|
53
53
|
"spec/spec_helper.rb",
|
54
54
|
"spec/units/extractor_spec.rb",
|
55
|
+
"spec/units/field_mapper_spec.rb",
|
55
56
|
"spec/units/field_name_mapper_spec.rb",
|
56
|
-
"spec/units/
|
57
|
-
"spec/units/shelver_spec.rb",
|
57
|
+
"spec/units/xml_extractor_spec.rb",
|
58
58
|
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
59
59
|
]
|
60
60
|
s.homepage = %q{http://github.com/projecthydra/solrizer}
|
@@ -63,12 +63,11 @@ Gem::Specification.new do |s|
|
|
63
63
|
s.rubygems_version = %q{1.3.7}
|
64
64
|
s.summary = %q{A utility for building solr indexes, usually from Fedora repository content.}
|
65
65
|
s.test_files = [
|
66
|
-
"spec/
|
67
|
-
"spec/spec_helper.rb",
|
66
|
+
"spec/spec_helper.rb",
|
68
67
|
"spec/units/extractor_spec.rb",
|
68
|
+
"spec/units/field_mapper_spec.rb",
|
69
69
|
"spec/units/field_name_mapper_spec.rb",
|
70
|
-
"spec/units/
|
71
|
-
"spec/units/shelver_spec.rb",
|
70
|
+
"spec/units/xml_extractor_spec.rb",
|
72
71
|
"spec/units/xml_terminology_based_solrizer_spec.rb"
|
73
72
|
]
|
74
73
|
|
@@ -77,18 +76,39 @@ Gem::Specification.new do |s|
|
|
77
76
|
s.specification_version = 3
|
78
77
|
|
79
78
|
if Gem::Version.new(Gem::VERSION) >= Gem::Version.new('1.2.0') then
|
80
|
-
s.add_runtime_dependency(%q<
|
81
|
-
s.add_runtime_dependency(%q<
|
82
|
-
s.
|
79
|
+
s.add_runtime_dependency(%q<solr-ruby>, [">= 0"])
|
80
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
81
|
+
s.add_runtime_dependency(%q<om>, [">= 0"])
|
82
|
+
s.add_runtime_dependency(%q<nokogiri>, [">= 0"])
|
83
|
+
s.add_runtime_dependency(%q<mediashelf-loggable>, [">= 0"])
|
84
|
+
s.add_development_dependency(%q<jeweler>, [">= 0"])
|
85
|
+
s.add_development_dependency(%q<ruby-debug>, [">= 0"])
|
86
|
+
s.add_development_dependency(%q<ruby-debug-base>, [">= 0"])
|
87
|
+
s.add_development_dependency(%q<rspec>, ["< 2.0.0"])
|
88
|
+
s.add_development_dependency(%q<mocha>, [">= 0"])
|
83
89
|
else
|
84
|
-
s.add_dependency(%q<
|
85
|
-
s.add_dependency(%q<
|
86
|
-
s.add_dependency(%q<
|
90
|
+
s.add_dependency(%q<solr-ruby>, [">= 0"])
|
91
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
92
|
+
s.add_dependency(%q<om>, [">= 0"])
|
93
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
94
|
+
s.add_dependency(%q<mediashelf-loggable>, [">= 0"])
|
95
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
96
|
+
s.add_dependency(%q<ruby-debug>, [">= 0"])
|
97
|
+
s.add_dependency(%q<ruby-debug-base>, [">= 0"])
|
98
|
+
s.add_dependency(%q<rspec>, ["< 2.0.0"])
|
99
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
87
100
|
end
|
88
101
|
else
|
89
|
-
s.add_dependency(%q<
|
90
|
-
s.add_dependency(%q<
|
91
|
-
s.add_dependency(%q<
|
102
|
+
s.add_dependency(%q<solr-ruby>, [">= 0"])
|
103
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
104
|
+
s.add_dependency(%q<om>, [">= 0"])
|
105
|
+
s.add_dependency(%q<nokogiri>, [">= 0"])
|
106
|
+
s.add_dependency(%q<mediashelf-loggable>, [">= 0"])
|
107
|
+
s.add_dependency(%q<jeweler>, [">= 0"])
|
108
|
+
s.add_dependency(%q<ruby-debug>, [">= 0"])
|
109
|
+
s.add_dependency(%q<ruby-debug-base>, [">= 0"])
|
110
|
+
s.add_dependency(%q<rspec>, ["< 2.0.0"])
|
111
|
+
s.add_dependency(%q<mocha>, [">= 0"])
|
92
112
|
end
|
93
113
|
end
|
94
114
|
|
data/spec/{spec.opts → .rspec}
RENAMED
File without changes
|
@@ -0,0 +1,16 @@
|
|
1
|
+
id: pid
|
2
|
+
default: edible
|
3
|
+
edible:
|
4
|
+
date: _edible_date
|
5
|
+
string: _edible_string
|
6
|
+
text: _edible_text
|
7
|
+
symbol: _edible_sym
|
8
|
+
integer: _edible_int
|
9
|
+
long: _edible_long
|
10
|
+
boolean: _edible_bool
|
11
|
+
float: _edible_float
|
12
|
+
double: _edible_double
|
13
|
+
displayable: _display
|
14
|
+
facetable: _facet
|
15
|
+
sortable: _sort
|
16
|
+
unstemmed_searchable: _unstem_search
|