discovery-indexer 0.8 → 0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/mapper/general_mapper.rb +1 -162
- data/lib/version.rb +1 -1
- metadata +3 -4
- data/lib/utilities/extract_sub_targets.rb +0 -13
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9af79cfce3c13ff2fd400f3d7e0ea465c64d82ab
|
4
|
+
data.tar.gz: 1920897136a5d98247abb6f41951d2ede01753f8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 39318208ad1e626434f3a41f78422686b33a136366512fb8016fbb06fc483f71c0a3a645993fbb93a888c46a20704f96667d0b4f5b4a43401961de31bd1ebb34
|
7
|
+
data.tar.gz: dd526e1834b74eb97974eac6cea1f4dbe66f5ebcf9f637c2c3d59fed1f3ad6c01f027eedcff23b8b84d8eb9fabe182c1f66ed217713b8fc5be6fe0711cb3eed9
|
@@ -20,170 +20,9 @@ module DiscoveryIndexer
|
|
20
20
|
def convert_to_solr_doc()
|
21
21
|
solr_doc = {}
|
22
22
|
solr_doc[:id] = @druid
|
23
|
-
solr_doc[:
|
24
|
-
solr_doc.update mods_to_title_fields
|
25
|
-
solr_doc.update mods_to_author_fields
|
26
|
-
solr_doc.update mods_to_subject_search_fields
|
27
|
-
solr_doc.update mods_to_publication_fields
|
28
|
-
solr_doc.update mods_to_pub_date
|
29
|
-
solr_doc.update mods_to_others
|
30
|
-
solr_doc.update hard_coded_fields
|
31
|
-
|
32
|
-
solr_doc[:collection] = @collection_names.nil? ? [] : @collection_names.keys
|
33
|
-
solr_doc[:modsxml] = @modsxml.to_xml
|
34
|
-
solr_doc[:all_search] = @modsxml.text.gsub(/\s+/, ' ')
|
23
|
+
solr_doc[:title] = @modsxml.sw_full_title
|
35
24
|
return solr_doc
|
36
25
|
end
|
37
|
-
|
38
|
-
# @return [Hash] Hash representing the title fields
|
39
|
-
def mods_to_title_fields
|
40
|
-
# title fields
|
41
|
-
doc_hash = {
|
42
|
-
:title_245a_search => @modsxml.sw_short_title,
|
43
|
-
:title_245_search => @modsxml.sw_full_title,
|
44
|
-
:title_variant_search => @modsxml.sw_addl_titles,
|
45
|
-
:title_sort => @modsxml.sw_sort_title,
|
46
|
-
:title_245a_display => @modsxml.sw_short_title,
|
47
|
-
:title_display => @modsxml.sw_title_display,
|
48
|
-
:title_full_display => @modsxml.sw_full_title,
|
49
|
-
}
|
50
|
-
doc_hash
|
51
|
-
end
|
52
|
-
|
53
|
-
# @return [Hash] Hash representing the author fields
|
54
|
-
def mods_to_author_fields
|
55
|
-
doc_hash = {
|
56
|
-
# author fields
|
57
|
-
:author_1xx_search => @modsxml.sw_main_author,
|
58
|
-
:author_7xx_search => @modsxml.sw_addl_authors,
|
59
|
-
:author_person_facet => @modsxml.sw_person_authors,
|
60
|
-
:author_other_facet => @modsxml.sw_impersonal_authors,
|
61
|
-
:author_sort => @modsxml.sw_sort_author[1..-1],
|
62
|
-
:author_corp_display => @modsxml.sw_corporate_authors,
|
63
|
-
:author_meeting_display => @modsxml.sw_meeting_authors,
|
64
|
-
:author_person_display => @modsxml.sw_person_authors,
|
65
|
-
:author_person_full_display => @modsxml.sw_person_authors,
|
66
|
-
}
|
67
|
-
doc_hash
|
68
|
-
end
|
69
|
-
|
70
|
-
# @return [Hash] Hash representing the search fields
|
71
|
-
def mods_to_subject_search_fields
|
72
|
-
doc_hash = {
|
73
|
-
# subject search fields
|
74
|
-
:topic_search => @modsxml.topic_search,
|
75
|
-
:geographic_search => @modsxml.geographic_search,
|
76
|
-
:subject_other_search => @modsxml.subject_other_search,
|
77
|
-
:subject_other_subvy_search => @modsxml.subject_other_subvy_search,
|
78
|
-
:subject_all_search => @modsxml.subject_all_search,
|
79
|
-
:topic_facet => @modsxml.topic_facet,
|
80
|
-
:geographic_facet => @modsxml.geographic_facet,
|
81
|
-
:era_facet => @modsxml.era_facet,
|
82
|
-
}
|
83
|
-
end
|
84
|
-
|
85
|
-
# @return [Hash] Hash representing the publication fields
|
86
|
-
def mods_to_publication_fields
|
87
|
-
doc_hash = {
|
88
|
-
# publication fields
|
89
|
-
:pub_search => @modsxml.place,
|
90
|
-
:pub_date_sort => @modsxml.pub_date_sort,
|
91
|
-
:imprint_display => @modsxml.pub_date_display,
|
92
|
-
:pub_date => @modsxml.pub_date_facet,
|
93
|
-
:pub_date_display => @modsxml.pub_date_display, # pub_date_display may be deprecated
|
94
|
-
}
|
95
|
-
end
|
96
|
-
|
97
|
-
# @return [Hash] Hash representing the pub date
|
98
|
-
def mods_to_pub_date
|
99
|
-
doc_hash = {}
|
100
|
-
pub_date_sort = @modsxml.pub_date_sort
|
101
|
-
if is_positive_int? pub_date_sort
|
102
|
-
doc_hash[:pub_year_tisim] = pub_date_sort # for date slider
|
103
|
-
# put the displayable year in the correct field, :creation_year_isi for example
|
104
|
-
doc_hash[date_type_sym] = @modsxml.pub_date_sort if date_type_sym
|
105
|
-
end
|
106
|
-
return doc_hash
|
107
|
-
end
|
108
|
-
|
109
|
-
# @return [Hash] Hash representing some fields
|
110
|
-
def mods_to_others
|
111
|
-
doc_hash = {
|
112
|
-
:format_main_ssim => format_main_ssim,
|
113
|
-
:format => format, # for backwards compatibility
|
114
|
-
:language => @modsxml.sw_language_facet,
|
115
|
-
:physical => @modsxml.term_values([:physical_description, :extent]),
|
116
|
-
:summary_search => @modsxml.term_values(:abstract),
|
117
|
-
:toc_search => @modsxml.term_values(:tableOfContents),
|
118
|
-
:url_suppl => @modsxml.term_values([:related_item, :location, :url]),
|
119
|
-
}
|
120
|
-
return doc_hash
|
121
|
-
end
|
122
|
-
|
123
|
-
def hard_coded_fields
|
124
|
-
doc_hash = {
|
125
|
-
:url_fulltext => "http://purl.stanford.edu/#{@druid}",
|
126
|
-
:access_facet => 'Online',
|
127
|
-
:building_facet => 'Stanford Digital Repository',
|
128
|
-
}
|
129
|
-
end
|
130
|
-
# select one or more format values from the controlled vocabulary here:
|
131
|
-
# http://searchworks-solr-lb.stanford.edu:8983/solr/select?facet.field=format&rows=0&facet.sort=index
|
132
|
-
# via stanford-mods gem
|
133
|
-
# @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
|
134
|
-
def format
|
135
|
-
vals = @modsxml.format
|
136
|
-
if vals.empty?
|
137
|
-
puts "#{@druid} has no SearchWorks format from MODS - check <typeOfResource> and other implicated MODS elements"
|
138
|
-
end
|
139
|
-
vals
|
140
|
-
end
|
141
|
-
|
142
|
-
# call stanford-mods format_main to get results
|
143
|
-
# @return [Array<String>] value(s) in the SearchWorks controlled vocabulary, or []
|
144
|
-
def format_main_ssim
|
145
|
-
vals = @modsxml.format_main
|
146
|
-
if vals.empty?
|
147
|
-
puts "#{@druid} has no SearchWorks Resource Type from MODS - check <typeOfResource> and other implicated MODS elements"
|
148
|
-
end
|
149
|
-
vals
|
150
|
-
end
|
151
|
-
|
152
|
-
# call stanford-mods sw_genre to get results
|
153
|
-
# @return [Array<String>] value(s)
|
154
|
-
def genre_ssim
|
155
|
-
@modsxml.sw_genre
|
156
|
-
end
|
157
|
-
|
158
|
-
protected
|
159
|
-
|
160
|
-
# @return true if the string parses into an int, and if so, the int is >= 0
|
161
|
-
def is_positive_int? str
|
162
|
-
begin
|
163
|
-
if str.to_i >= 0
|
164
|
-
return true
|
165
|
-
else
|
166
|
-
return false
|
167
|
-
end
|
168
|
-
rescue
|
169
|
-
end
|
170
|
-
return false
|
171
|
-
end
|
172
|
-
|
173
|
-
# determines particular flavor of displayable publication year field
|
174
|
-
# @return Solr field name as a symbol
|
175
|
-
def date_type_sym
|
176
|
-
vals = @modsxml.term_values([:origin_info,:dateIssued])
|
177
|
-
if vals and vals.length > 0
|
178
|
-
return :publication_year_isi
|
179
|
-
end
|
180
|
-
vals = @modsxml.term_values([:origin_info,:dateCreated])
|
181
|
-
if vals and vals.length > 0
|
182
|
-
return :creation_year_isi
|
183
|
-
end
|
184
|
-
nil
|
185
|
-
end
|
186
|
-
|
187
26
|
end
|
188
27
|
end
|
189
28
|
end
|
data/lib/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: discovery-indexer
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: '0.
|
4
|
+
version: '0.9'
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ahmed AlSum
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-04-
|
11
|
+
date: 2015-04-09 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: nokogiri
|
@@ -154,7 +154,6 @@ files:
|
|
154
154
|
- lib/reader/purlxml_parser.rb
|
155
155
|
- lib/reader/purlxml_parser_strict.rb
|
156
156
|
- lib/reader/purlxml_reader.rb
|
157
|
-
- lib/utilities/extract_sub_targets.rb
|
158
157
|
- lib/version.rb
|
159
158
|
- lib/writer/solr_client.rb
|
160
159
|
- lib/writer/solr_writer.rb
|
@@ -178,7 +177,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
178
177
|
version: '0'
|
179
178
|
requirements: []
|
180
179
|
rubyforge_project:
|
181
|
-
rubygems_version: 2.
|
180
|
+
rubygems_version: 2.4.5
|
182
181
|
signing_key:
|
183
182
|
specification_version: 4
|
184
183
|
summary: Shared library for the basic discovery indexing operation for Stanford DLSS.
|