solrizer 2.0.0.rc4 → 2.0.0.rc5
Sign up to get free protection for your applications and to get access to all the features.
data/History.txt
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
h2. 2.0.0
|
2
2
|
HYDRA-827 DO NOT index terms by default
|
3
3
|
HYDRA-863 Null pointer exception fixed.
|
4
|
-
|
5
|
-
BUG:
|
6
|
-
|
7
|
-
|
8
|
-
Test fixes such as properly testing solr field suffixes when using the :type option in OM as well as using correct
|
9
|
-
ISO.8601 dates in our fixtures
|
4
|
+
BUG: Replaced data_type method with type which was causing the :type option in OM terms not to work. This makes certain tests not pass under Ruby 1.8.7, specifically when using :type option with a proxied term in OM.
|
5
|
+
BUG: Fixed TerminologyBasedSolrizer#solrize_node instance method to call class method correctly, and changed #solrize_term to call instance method #solrize_node so that it can be overridden by subclasses
|
6
|
+
Test fixes such as properly testing solr field suffixes when using the :type option in OM as well as using correct ISO.8601 dates in our fixtures
|
7
|
+
HYDRA-876 When solrizing OM documents, term_values is called instead of find_by_terms which ensures data types are deserialized correctly. Dates are also converted to ISO 8601 format, both when intgrating with OM or when using solrized independently.
|
10
8
|
|
11
9
|
h2. 1.2.2
|
12
10
|
BUG: RSolr::Client doesn't have a delete method. Changed to delete_by_id. (only affects solrizer shell script) Thanks to mkorcy.
|
@@ -330,7 +330,9 @@ module Solrizer
|
|
330
330
|
id_field 'id'
|
331
331
|
index_as :searchable, :default => true do |t|
|
332
332
|
t.default :suffix => '_t'
|
333
|
-
t.date
|
333
|
+
t.date :suffix => '_dt' do |value|
|
334
|
+
value.is_a?(Date) ? Time.parse(value.to_s).utc.iso8601 : Time.parse(value).utc.iso8601
|
335
|
+
end
|
334
336
|
t.string :suffix => '_t'
|
335
337
|
t.text :suffix => '_t'
|
336
338
|
t.symbol :suffix => '_s'
|
data/lib/solrizer/version.rb
CHANGED
@@ -14,32 +14,36 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
14
14
|
unless doc.class.terminology.nil?
|
15
15
|
doc.class.terminology.terms.each_pair do |term_name,term|
|
16
16
|
doc.solrize_term(term, solr_doc, field_mapper)
|
17
|
-
# self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
20
|
return solr_doc
|
22
21
|
end
|
23
22
|
|
24
|
-
# Populate a solr document with fields based on nodes in +xml+
|
25
|
-
# term
|
23
|
+
# Populate a solr document with fields based on nodes in +xml+
|
24
|
+
# Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
|
25
|
+
# and are deserialized by OM according to :type, as determined in its terminology.
|
26
|
+
# The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
|
27
|
+
# rendered to a string.
|
26
28
|
# @param [OM::XML::Document] doc xml document to extract values from
|
27
29
|
# @param [OM::XML::Term] term corresponding to desired xml values
|
28
30
|
# @param [Hash] (optional) solr_doc (values hash) to populate
|
29
31
|
def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
30
|
-
terminology = doc.class.terminology
|
31
32
|
parents = opts.fetch(:parents, [])
|
32
|
-
|
33
33
|
term_pointer = parents+[term.name]
|
34
|
-
nodeset = doc.
|
34
|
+
nodeset = doc.term_values(*term_pointer)
|
35
35
|
|
36
|
-
nodeset.each do |
|
37
|
-
|
36
|
+
nodeset.each do |n|
|
37
|
+
|
38
|
+
# TODO: Solrizer::FieldMapper::Default is supposed to translate dates into full ISO 8601 formatted strings.
|
39
|
+
# However, there an integration issue with ActiveFedora using OM: it ignores the default field mapper given
|
40
|
+
# in this gem that does this. So, the following is a workaround until it is fixed.
|
41
|
+
node = n.is_a?(Date) ? Time.parse(n.to_s).utc.iso8601 : n.to_s
|
38
42
|
|
39
|
-
doc.solrize_node(node, term_pointer, term, solr_doc, field_mapper)
|
43
|
+
doc.solrize_node(node.to_s, term_pointer, term, solr_doc, field_mapper)
|
40
44
|
unless term.kind_of? OM::XML::NamedTermProxy
|
41
45
|
term.children.each_pair do |child_term_name, child_term|
|
42
|
-
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
46
|
+
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node.to_s)}]})
|
43
47
|
end
|
44
48
|
end
|
45
49
|
end
|
@@ -55,16 +59,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
55
59
|
# @param [Term] term the term to be solrized
|
56
60
|
# @param [Hash] (optional) solr_doc (values hash) to populate
|
57
61
|
# @return [Hash] the solr doc
|
58
|
-
def self.solrize_node(
|
62
|
+
def self.solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
63
|
return solr_doc unless term.index_as && !term.index_as.empty?
|
60
64
|
field_mapper ||= self.default_field_mapper
|
61
|
-
terminology = doc.class.terminology
|
62
|
-
|
63
|
-
if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
|
64
|
-
node_value = node.value
|
65
|
-
else
|
66
|
-
node_value = node.text
|
67
|
-
end
|
68
65
|
|
69
66
|
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
70
67
|
|
@@ -201,7 +201,13 @@ describe Solrizer::FieldMapper do
|
|
201
201
|
it "should apply mappings for searchable by default" do
|
202
202
|
# Just sanity check a couple; copy & pasting all data types is silly
|
203
203
|
@mapper.solr_names_and_values('foo', 'bar', :string, []).should == { 'foo_t' => ['bar'] }
|
204
|
-
@mapper.solr_names_and_values('foo',
|
204
|
+
@mapper.solr_names_and_values('foo', "1", :integer, []).should == { 'foo_i' =>["1"] }
|
205
|
+
end
|
206
|
+
|
207
|
+
it "should support full ISO 8601 dates" do
|
208
|
+
@mapper.solr_names_and_values('foo', "2012-11-06", :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
209
|
+
@mapper.solr_names_and_values('foo', "November 6th, 2012", :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
210
|
+
@mapper.solr_names_and_values('foo', Date.parse("6 Nov. 2012"), :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
205
211
|
end
|
206
212
|
|
207
213
|
it "should support displayable, facetable, sortable, unstemmed" do
|
@@ -55,7 +55,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
55
55
|
solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
|
56
56
|
|
57
57
|
# These are a holdover from an old verison of OM
|
58
|
-
solr_doc['journal_0_issue_0_publication_date_dt'].should == ["2007-02-
|
58
|
+
solr_doc['journal_0_issue_0_publication_date_dt'].should == ["2007-02-01T05:00:00Z"]
|
59
59
|
|
60
60
|
|
61
61
|
end
|
@@ -89,14 +89,14 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
89
89
|
unless RUBY_VERSION.match("1.8.7")
|
90
90
|
solr_doc = Hash.new
|
91
91
|
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
|
92
|
-
solr_doc["pub_date_dt"].should == ["2007-02-
|
92
|
+
solr_doc["pub_date_dt"].should == ["2007-02-01T05:00:00Z"]
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
96
|
it "should add fields based on type using ref" do
|
97
97
|
solr_doc = Hash.new
|
98
98
|
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
|
99
|
-
solr_doc["issue_date_dt"].should == ["2007-02-
|
99
|
+
solr_doc["issue_date_dt"].should == ["2007-02-15T05:00:00Z"]
|
100
100
|
end
|
101
101
|
|
102
102
|
it "shouldn't index terms where index_as is an empty array" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.rc5
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -264,4 +264,3 @@ test_files:
|
|
264
264
|
- spec/units/field_name_mapper_spec.rb
|
265
265
|
- spec/units/xml_extractor_spec.rb
|
266
266
|
- spec/units/xml_terminology_based_solrizer_spec.rb
|
267
|
-
has_rdoc:
|