solrizer 2.0.0.rc4 → 2.0.0.rc5
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/History.txt
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
h2. 2.0.0
|
2
2
|
HYDRA-827 DO NOT index terms by default
|
3
3
|
HYDRA-863 Null pointer exception fixed.
|
4
|
-
|
5
|
-
BUG:
|
6
|
-
|
7
|
-
|
8
|
-
Test fixes such as properly testing solr field suffixes when using the :type option in OM as well as using correct
|
9
|
-
ISO.8601 dates in our fixtures
|
4
|
+
BUG: Replaced data_type method with type which was causing the :type option in OM terms not to work. This makes certain tests not pass under Ruby 1.8.7, specifically when using :type option with a proxied term in OM.
|
5
|
+
BUG: Fixed TerminologyBasedSolrizer#solrize_node instance method to call class method correctly, and changed #solrize_term to call instance method #solrize_node so that it can be overridden by subclasses
|
6
|
+
Test fixes such as properly testing solr field suffixes when using the :type option in OM as well as using correct ISO.8601 dates in our fixtures
|
7
|
+
HYDRA-876 When solrizing OM documents, term_values is called instead of find_by_terms which ensures data types are deserialized correctly. Dates are also converted to ISO 8601 format, both when intgrating with OM or when using solrized independently.
|
10
8
|
|
11
9
|
h2. 1.2.2
|
12
10
|
BUG: RSolr::Client doesn't have a delete method. Changed to delete_by_id. (only affects solrizer shell script) Thanks to mkorcy.
|
@@ -330,7 +330,9 @@ module Solrizer
|
|
330
330
|
id_field 'id'
|
331
331
|
index_as :searchable, :default => true do |t|
|
332
332
|
t.default :suffix => '_t'
|
333
|
-
t.date
|
333
|
+
t.date :suffix => '_dt' do |value|
|
334
|
+
value.is_a?(Date) ? Time.parse(value.to_s).utc.iso8601 : Time.parse(value).utc.iso8601
|
335
|
+
end
|
334
336
|
t.string :suffix => '_t'
|
335
337
|
t.text :suffix => '_t'
|
336
338
|
t.symbol :suffix => '_s'
|
data/lib/solrizer/version.rb
CHANGED
@@ -14,32 +14,36 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
14
14
|
unless doc.class.terminology.nil?
|
15
15
|
doc.class.terminology.terms.each_pair do |term_name,term|
|
16
16
|
doc.solrize_term(term, solr_doc, field_mapper)
|
17
|
-
# self.solrize_by_term(accessor_name, accessor_info, :solr_doc=>solr_doc)
|
18
17
|
end
|
19
18
|
end
|
20
19
|
|
21
20
|
return solr_doc
|
22
21
|
end
|
23
22
|
|
24
|
-
# Populate a solr document with fields based on nodes in +xml+
|
25
|
-
# term
|
23
|
+
# Populate a solr document with fields based on nodes in +xml+
|
24
|
+
# Values for a term are gathered by to +term_pointer+ using OM::XML::TermValueOperators.term_values
|
25
|
+
# and are deserialized by OM according to :type, as determined in its terminology.
|
26
|
+
# The content of the actual field in solr is each +node+ of the +nodeset+ returned by OM,
|
27
|
+
# rendered to a string.
|
26
28
|
# @param [OM::XML::Document] doc xml document to extract values from
|
27
29
|
# @param [OM::XML::Term] term corresponding to desired xml values
|
28
30
|
# @param [Hash] (optional) solr_doc (values hash) to populate
|
29
31
|
def self.solrize_term(doc, term, solr_doc = Hash.new, field_mapper = nil, opts={})
|
30
|
-
terminology = doc.class.terminology
|
31
32
|
parents = opts.fetch(:parents, [])
|
32
|
-
|
33
33
|
term_pointer = parents+[term.name]
|
34
|
-
nodeset = doc.
|
34
|
+
nodeset = doc.term_values(*term_pointer)
|
35
35
|
|
36
|
-
nodeset.each do |
|
37
|
-
|
36
|
+
nodeset.each do |n|
|
37
|
+
|
38
|
+
# TODO: Solrizer::FieldMapper::Default is supposed to translate dates into full ISO 8601 formatted strings.
|
39
|
+
# However, there an integration issue with ActiveFedora using OM: it ignores the default field mapper given
|
40
|
+
# in this gem that does this. So, the following is a workaround until it is fixed.
|
41
|
+
node = n.is_a?(Date) ? Time.parse(n.to_s).utc.iso8601 : n.to_s
|
38
42
|
|
39
|
-
doc.solrize_node(node, term_pointer, term, solr_doc, field_mapper)
|
43
|
+
doc.solrize_node(node.to_s, term_pointer, term, solr_doc, field_mapper)
|
40
44
|
unless term.kind_of? OM::XML::NamedTermProxy
|
41
45
|
term.children.each_pair do |child_term_name, child_term|
|
42
|
-
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node)}]})
|
46
|
+
doc.solrize_term(child_term, solr_doc, field_mapper, opts={:parents=>parents+[{term.name=>nodeset.index(node.to_s)}]})
|
43
47
|
end
|
44
48
|
end
|
45
49
|
end
|
@@ -55,16 +59,9 @@ module Solrizer::XML::TerminologyBasedSolrizer
|
|
55
59
|
# @param [Term] term the term to be solrized
|
56
60
|
# @param [Hash] (optional) solr_doc (values hash) to populate
|
57
61
|
# @return [Hash] the solr doc
|
58
|
-
def self.solrize_node(
|
62
|
+
def self.solrize_node(node_value, doc, term_pointer, term, solr_doc = Hash.new, field_mapper = nil, opts = {})
|
59
63
|
return solr_doc unless term.index_as && !term.index_as.empty?
|
60
64
|
field_mapper ||= self.default_field_mapper
|
61
|
-
terminology = doc.class.terminology
|
62
|
-
|
63
|
-
if term.path.kind_of?(Hash) && term.path.has_key?(:attribute)
|
64
|
-
node_value = node.value
|
65
|
-
else
|
66
|
-
node_value = node.text
|
67
|
-
end
|
68
65
|
|
69
66
|
generic_field_name_base = OM::XML::Terminology.term_generic_name(*term_pointer)
|
70
67
|
|
@@ -201,7 +201,13 @@ describe Solrizer::FieldMapper do
|
|
201
201
|
it "should apply mappings for searchable by default" do
|
202
202
|
# Just sanity check a couple; copy & pasting all data types is silly
|
203
203
|
@mapper.solr_names_and_values('foo', 'bar', :string, []).should == { 'foo_t' => ['bar'] }
|
204
|
-
@mapper.solr_names_and_values('foo',
|
204
|
+
@mapper.solr_names_and_values('foo', "1", :integer, []).should == { 'foo_i' =>["1"] }
|
205
|
+
end
|
206
|
+
|
207
|
+
it "should support full ISO 8601 dates" do
|
208
|
+
@mapper.solr_names_and_values('foo', "2012-11-06", :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
209
|
+
@mapper.solr_names_and_values('foo', "November 6th, 2012", :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
210
|
+
@mapper.solr_names_and_values('foo', Date.parse("6 Nov. 2012"), :date, []).should == { 'foo_dt' =>["2012-11-06T05:00:00Z"] }
|
205
211
|
end
|
206
212
|
|
207
213
|
it "should support displayable, facetable, sortable, unstemmed" do
|
@@ -55,7 +55,7 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
55
55
|
solr_doc["topic_tag_t"].sort.should == ["CONTROLLED TERM", "TOPIC 1", "TOPIC 2"]
|
56
56
|
|
57
57
|
# These are a holdover from an old verison of OM
|
58
|
-
solr_doc['journal_0_issue_0_publication_date_dt'].should == ["2007-02-
|
58
|
+
solr_doc['journal_0_issue_0_publication_date_dt'].should == ["2007-02-01T05:00:00Z"]
|
59
59
|
|
60
60
|
|
61
61
|
end
|
@@ -89,14 +89,14 @@ describe Solrizer::XML::TerminologyBasedSolrizer do
|
|
89
89
|
unless RUBY_VERSION.match("1.8.7")
|
90
90
|
solr_doc = Hash.new
|
91
91
|
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:pub_date), solr_doc)
|
92
|
-
solr_doc["pub_date_dt"].should == ["2007-02-
|
92
|
+
solr_doc["pub_date_dt"].should == ["2007-02-01T05:00:00Z"]
|
93
93
|
end
|
94
94
|
end
|
95
95
|
|
96
96
|
it "should add fields based on type using ref" do
|
97
97
|
solr_doc = Hash.new
|
98
98
|
result = @mods_article.solrize_term(Samples::ModsArticle.terminology.retrieve_term(:issue_date), solr_doc)
|
99
|
-
solr_doc["issue_date_dt"].should == ["2007-02-
|
99
|
+
solr_doc["issue_date_dt"].should == ["2007-02-15T05:00:00Z"]
|
100
100
|
end
|
101
101
|
|
102
102
|
it "shouldn't index terms where index_as is an empty array" do
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: solrizer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.0.0.
|
4
|
+
version: 2.0.0.rc5
|
5
5
|
prerelease: 6
|
6
6
|
platform: ruby
|
7
7
|
authors:
|
@@ -9,7 +9,7 @@ authors:
|
|
9
9
|
autorequire:
|
10
10
|
bindir: bin
|
11
11
|
cert_chain: []
|
12
|
-
date: 2012-11-
|
12
|
+
date: 2012-11-07 00:00:00.000000000 Z
|
13
13
|
dependencies:
|
14
14
|
- !ruby/object:Gem::Dependency
|
15
15
|
name: nokogiri
|
@@ -264,4 +264,3 @@ test_files:
|
|
264
264
|
- spec/units/field_name_mapper_spec.rb
|
265
265
|
- spec/units/xml_extractor_spec.rb
|
266
266
|
- spec/units/xml_terminology_based_solrizer_spec.rb
|
267
|
-
has_rdoc:
|