cul-fedora 0.7.0 → 0.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/VERSION +1 -1
- data/cul-fedora.gemspec +2 -2
- data/lib/cul-fedora/item.rb +65 -19
- data/lib/cul-fedora/solr.rb +15 -3
- metadata +4 -4
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
0.
|
1
|
+
0.8.0
|
data/cul-fedora.gemspec
CHANGED
@@ -5,11 +5,11 @@
|
|
5
5
|
|
6
6
|
Gem::Specification.new do |s|
|
7
7
|
s.name = %q{cul-fedora}
|
8
|
-
s.version = "0.
|
8
|
+
s.version = "0.8.0"
|
9
9
|
|
10
10
|
s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
|
11
11
|
s.authors = ["James Stuart"]
|
12
|
-
s.date = %q{2011-
|
12
|
+
s.date = %q{2011-05-02}
|
13
13
|
s.description = %q{Columbia-specific Fedora libraries}
|
14
14
|
s.email = %q{tastyhat@jamesstuart.org}
|
15
15
|
s.extra_rdoc_files = [
|
data/lib/cul-fedora/item.rb
CHANGED
@@ -103,9 +103,12 @@ module Cul
|
|
103
103
|
|
104
104
|
get_fullname = lambda { |node| node.nil? ? nil : (node.css("namePart[@type='family']").collect(&:content) | node.css("namePart[@type='given']").collect(&:content)).join(", ") }
|
105
105
|
|
106
|
-
roles = ["Author","
|
107
|
-
|
106
|
+
roles = ["Author","Creator","Thesis Advisor","Collector","Owner","Speaker","Seminar Chairman","Secretary","Rapporteur","Committee Member","Degree Grantor","Moderator","Editor","Interviewee","Interviewer","Organizer of Meeting","Originator","Teacher"]
|
107
|
+
roles = roles.map { |role| role.downcase }
|
108
108
|
|
109
|
+
organizations = []
|
110
|
+
departments = []
|
111
|
+
|
109
112
|
begin
|
110
113
|
collections = self.belongsTo
|
111
114
|
meta = describedBy.first
|
@@ -124,29 +127,57 @@ module Cul
|
|
124
127
|
|
125
128
|
|
126
129
|
|
127
|
-
title =
|
130
|
+
title = mods.css("titleInfo>title").first.text
|
131
|
+
title_search = normalize_space.call(mods.css("titleInfo>nonSort,title").collect(&:content).join(" "))
|
132
|
+
record_creation_date = mods.at_css("recordInfo>recordCreationDate")
|
133
|
+
if(record_creation_date.nil?)
|
134
|
+
record_creation_date = mods.at_css("recordInfo>recordChangeDate")
|
135
|
+
end
|
136
|
+
if(!record_creation_date.nil? || !record_creation_date.empty?)
|
137
|
+
record_creation_date = DateTime.parse(record_creation_date.text.gsub("UTC", "").strip)
|
138
|
+
add_field.call("record_creation_date", record_creation_date.strftime("%Y-%m-%dT%H:%M:%SZ"))
|
139
|
+
end
|
128
140
|
add_field.call("title_display", title)
|
129
|
-
add_field.call("title_search",
|
141
|
+
add_field.call("title_search", title_search)
|
130
142
|
|
131
143
|
all_names = []
|
132
144
|
mods.css("name[@type='personal']").each do |name_node|
|
133
|
-
if name_node.css("role>roleTerm
|
145
|
+
if name_node.css("role>roleTerm").collect(&:content).any? { |role| roles.include?(role) }
|
134
146
|
|
135
147
|
fullname = get_fullname.call(name_node)
|
136
148
|
|
137
149
|
all_names << fullname
|
138
|
-
|
139
|
-
|
140
|
-
|
150
|
+
if(!name_node["ID"].nil?)
|
151
|
+
add_field.call("author_id_uni", name_node["ID"])
|
152
|
+
end
|
141
153
|
add_field.call("author_search", fullname.downcase)
|
142
154
|
add_field.call("author_facet", fullname)
|
155
|
+
|
156
|
+
name_node.css("affiliation").each do |affiliation_node|
|
157
|
+
affiliation_text = affiliation_node.text
|
158
|
+
if(affiliation_text.include?(". "))
|
159
|
+
affiliation_split = affiliation_text.split(". ")
|
160
|
+
organizations.push(affiliation_split[0].strip)
|
161
|
+
departments.push(affiliation_split[1].strip)
|
162
|
+
end
|
163
|
+
end
|
143
164
|
|
144
165
|
end
|
145
|
-
|
166
|
+
end
|
167
|
+
|
168
|
+
mods.css("name[@type='corporate']").each do |corp_name_node|
|
169
|
+
if(!corp_name_node["ID"].nil? && corp_name_node["ID"].include?("originator"))
|
170
|
+
name_part = corp_name_node.at_css("namePart").text
|
171
|
+
if(name_part.include?(". "))
|
172
|
+
name_part_split = name_part.split(". ")
|
173
|
+
organizations.push(name_part_split[0].strip)
|
174
|
+
departments.push(name_part_split[1].strip)
|
175
|
+
end
|
176
|
+
end
|
146
177
|
end
|
147
178
|
|
148
179
|
add_field.call("authors_display",all_names.join("; "))
|
149
|
-
add_field.call("
|
180
|
+
add_field.call("pub_date", mods.at_css("*[@keyDate='yes']"))
|
150
181
|
|
151
182
|
mods.css("genre").each do |genre_node|
|
152
183
|
add_field.call("genre_facet", genre_node)
|
@@ -158,14 +189,14 @@ module Cul
|
|
158
189
|
add_field.call("abstract", mods.at_css("abstract"))
|
159
190
|
add_field.call("handle", mods.at_css("identifier[@type='hdl']"))
|
160
191
|
|
161
|
-
mods.css("subject
|
162
|
-
|
163
|
-
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
192
|
+
mods.css("subject").each do |subject_node|
|
193
|
+
if(subject_node.attributes.count == 0)
|
194
|
+
subject_node.css("topic").each do |topic_node|
|
195
|
+
add_field.call("keyword_search", topic_node.content.downcase)
|
196
|
+
add_field.call("subject", topic_node)
|
197
|
+
add_field.call("subject_search", topic_node)
|
198
|
+
end
|
199
|
+
end
|
169
200
|
end
|
170
201
|
|
171
202
|
|
@@ -203,7 +234,22 @@ module Cul
|
|
203
234
|
add_field.call("geographic_area_search", geo)
|
204
235
|
end
|
205
236
|
|
206
|
-
add_field.call("export_as_mla_citation_txt","
|
237
|
+
add_field.call("export_as_mla_citation_txt","")
|
238
|
+
|
239
|
+
if(organizations.count > 0)
|
240
|
+
organizations = organizations.uniq
|
241
|
+
organizations.each do |organization|
|
242
|
+
add_field.call("affiliation_organization", organization)
|
243
|
+
end
|
244
|
+
end
|
245
|
+
|
246
|
+
if(departments.count > 0)
|
247
|
+
departments = departments.uniq
|
248
|
+
departments.each do |department|
|
249
|
+
add_field.call("affiliation_department", department.to_s.sub(", Department of", "").strip)
|
250
|
+
end
|
251
|
+
end
|
252
|
+
|
207
253
|
end
|
208
254
|
|
209
255
|
|
data/lib/cul-fedora/solr.rb
CHANGED
@@ -15,6 +15,11 @@ module Cul
|
|
15
15
|
@rsolr ||= RSolr.connect(:url => @url)
|
16
16
|
end
|
17
17
|
|
18
|
+
def delete_index
|
19
|
+
rsolr.delete_by_query("*:*")
|
20
|
+
rsolr.commit
|
21
|
+
end
|
22
|
+
|
18
23
|
def ingest(options = {})
|
19
24
|
format = options.delete(:format) || raise(ArgumentError, "needs format")
|
20
25
|
|
@@ -22,13 +27,14 @@ module Cul
|
|
22
27
|
items = [items] unless items.kind_of?(Array)
|
23
28
|
collections = options.delete(:collections) || []
|
24
29
|
collections = [collections] unless collections.kind_of?(Array)
|
30
|
+
ignore = options.delete(:ignore) || []
|
31
|
+
ignore = [ignore] unless ignore.kind_of?(Array)
|
25
32
|
|
26
33
|
overwrite = options.delete(:overwrite) || false
|
27
34
|
process = options.delete(:process) || nil
|
28
35
|
skip = options.delete(:skip) || nil
|
29
36
|
|
30
|
-
|
31
|
-
|
37
|
+
|
32
38
|
collections.each do |collection|
|
33
39
|
items |= collection.listMembers
|
34
40
|
end
|
@@ -40,6 +46,12 @@ module Cul
|
|
40
46
|
errors = {}
|
41
47
|
|
42
48
|
items.each do |i|
|
49
|
+
|
50
|
+
if(ignore.index(i.pid).nil? == false)
|
51
|
+
$stdout.puts "Ignoring " + i.pid + "..."
|
52
|
+
next
|
53
|
+
end
|
54
|
+
|
43
55
|
if process && skip && skip > 0
|
44
56
|
skip -= 1
|
45
57
|
next
|
@@ -55,7 +67,7 @@ module Cul
|
|
55
67
|
end
|
56
68
|
|
57
69
|
|
58
|
-
|
70
|
+
$stdout.puts "Indexing " + i.pid + "..."
|
59
71
|
|
60
72
|
result_hash = i.send("index_for_#{format}", options)
|
61
73
|
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cul-fedora
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
hash:
|
4
|
+
hash: 63
|
5
5
|
prerelease: false
|
6
6
|
segments:
|
7
7
|
- 0
|
8
|
-
-
|
8
|
+
- 8
|
9
9
|
- 0
|
10
|
-
version: 0.
|
10
|
+
version: 0.8.0
|
11
11
|
platform: ruby
|
12
12
|
authors:
|
13
13
|
- James Stuart
|
@@ -15,7 +15,7 @@ autorequire:
|
|
15
15
|
bindir: bin
|
16
16
|
cert_chain: []
|
17
17
|
|
18
|
-
date: 2011-
|
18
|
+
date: 2011-05-02 00:00:00 -04:00
|
19
19
|
default_executable:
|
20
20
|
dependencies:
|
21
21
|
- !ruby/object:Gem::Dependency
|