cul-fedora 0.7.0 → 0.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.0
1
+ 0.8.0
data/cul-fedora.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{cul-fedora}
8
- s.version = "0.7.0"
8
+ s.version = "0.8.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["James Stuart"]
12
- s.date = %q{2011-04-22}
12
+ s.date = %q{2011-05-02}
13
13
  s.description = %q{Columbia-specific Fedora libraries}
14
14
  s.email = %q{tastyhat@jamesstuart.org}
15
15
  s.extra_rdoc_files = [
@@ -103,9 +103,12 @@ module Cul
103
103
 
104
104
  get_fullname = lambda { |node| node.nil? ? nil : (node.css("namePart[@type='family']").collect(&:content) | node.css("namePart[@type='given']").collect(&:content)).join(", ") }
105
105
 
106
- roles = ["Author","author","Creator","Thesis Advisor","Collector","Owner","Speaker","Seminar Chairman","Secretary","Rapporteur","Committee Member","Degree Grantor","Moderator","Editor","Interviewee","Interviewer","Organizer of Meeting","Originator","Teacher"]
107
-
106
+ roles = ["Author","Creator","Thesis Advisor","Collector","Owner","Speaker","Seminar Chairman","Secretary","Rapporteur","Committee Member","Degree Grantor","Moderator","Editor","Interviewee","Interviewer","Organizer of Meeting","Originator","Teacher"]
107
+ roles = roles.map { |role| role.downcase }
108
108
 
109
+ organizations = []
110
+ departments = []
111
+
109
112
  begin
110
113
  collections = self.belongsTo
111
114
  meta = describedBy.first
@@ -124,29 +127,57 @@ module Cul
124
127
 
125
128
 
126
129
 
127
- title = normalize_space.call(mods.css("titleInfo>nonSort,title").collect(&:content).join(" "))
130
+ title = mods.css("titleInfo>title").first.text
131
+ title_search = normalize_space.call(mods.css("titleInfo>nonSort,title").collect(&:content).join(" "))
132
+ record_creation_date = mods.at_css("recordInfo>recordCreationDate")
133
+ if(record_creation_date.nil?)
134
+ record_creation_date = mods.at_css("recordInfo>recordChangeDate")
135
+ end
136
+ if(!record_creation_date.nil? || !record_creation_date.empty?)
137
+ record_creation_date = DateTime.parse(record_creation_date.text.gsub("UTC", "").strip)
138
+ add_field.call("record_creation_date", record_creation_date.strftime("%Y-%m-%dT%H:%M:%SZ"))
139
+ end
128
140
  add_field.call("title_display", title)
129
- add_field.call("title_search", title)
141
+ add_field.call("title_search", title_search)
130
142
 
131
143
  all_names = []
132
144
  mods.css("name[@type='personal']").each do |name_node|
133
- if name_node.css("role>roleTerm[@type='text']").collect(&:content).any? { |role| roles.include?(role) }
145
+ if name_node.css("role>roleTerm").collect(&:content).any? { |role| roles.include?(role) }
134
146
 
135
147
  fullname = get_fullname.call(name_node)
136
148
 
137
149
  all_names << fullname
138
- add_field.call("author_id_uni", name_node.at_css("authorID[@type='institution']"))
139
- add_field.call("author_id_repository", name_node.at_css("authorID[@type='repository']"))
140
- add_field.call("author_id_naf", name_node.at_css("authorID[@type='naf']"))
150
+ if(!name_node["ID"].nil?)
151
+ add_field.call("author_id_uni", name_node["ID"])
152
+ end
141
153
  add_field.call("author_search", fullname.downcase)
142
154
  add_field.call("author_facet", fullname)
155
+
156
+ name_node.css("affiliation").each do |affiliation_node|
157
+ affiliation_text = affiliation_node.text
158
+ if(affiliation_text.include?(". "))
159
+ affiliation_split = affiliation_text.split(". ")
160
+ organizations.push(affiliation_split[0].strip)
161
+ departments.push(affiliation_split[1].strip)
162
+ end
163
+ end
143
164
 
144
165
  end
145
-
166
+ end
167
+
168
+ mods.css("name[@type='corporate']").each do |corp_name_node|
169
+ if(!corp_name_node["ID"].nil? && corp_name_node["ID"].include?("originator"))
170
+ name_part = corp_name_node.at_css("namePart").text
171
+ if(name_part.include?(". "))
172
+ name_part_split = name_part.split(". ")
173
+ organizations.push(name_part_split[0].strip)
174
+ departments.push(name_part_split[1].strip)
175
+ end
176
+ end
146
177
  end
147
178
 
148
179
  add_field.call("authors_display",all_names.join("; "))
149
- add_field.call("date", mods.at_css("*[@keyDate='yes']"))
180
+ add_field.call("pub_date", mods.at_css("*[@keyDate='yes']"))
150
181
 
151
182
  mods.css("genre").each do |genre_node|
152
183
  add_field.call("genre_facet", genre_node)
@@ -158,14 +189,14 @@ module Cul
158
189
  add_field.call("abstract", mods.at_css("abstract"))
159
190
  add_field.call("handle", mods.at_css("identifier[@type='hdl']"))
160
191
 
161
- mods.css("subject:not([@authority='local'])>topic").each do |topic_node|
162
- add_field.call("keyword_search", topic_node.content.downcase)
163
- add_field.call("keyword_facet", topic_node)
164
- end
165
-
166
- mods.css("subject[@authority='local']>topic").each do |topic_node|
167
- add_field.call("subject", topic_node)
168
- add_field.call("subject_search", topic_node)
192
+ mods.css("subject").each do |subject_node|
193
+ if(subject_node.attributes.count == 0)
194
+ subject_node.css("topic").each do |topic_node|
195
+ add_field.call("keyword_search", topic_node.content.downcase)
196
+ add_field.call("subject", topic_node)
197
+ add_field.call("subject_search", topic_node)
198
+ end
199
+ end
169
200
  end
170
201
 
171
202
 
@@ -203,7 +234,22 @@ module Cul
203
234
  add_field.call("geographic_area_search", geo)
204
235
  end
205
236
 
206
- add_field.call("export_as_mla_citation_txt","test")
237
+ add_field.call("export_as_mla_citation_txt","")
238
+
239
+ if(organizations.count > 0)
240
+ organizations = organizations.uniq
241
+ organizations.each do |organization|
242
+ add_field.call("affiliation_organization", organization)
243
+ end
244
+ end
245
+
246
+ if(departments.count > 0)
247
+ departments = departments.uniq
248
+ departments.each do |department|
249
+ add_field.call("affiliation_department", department.to_s.sub(", Department of", "").strip)
250
+ end
251
+ end
252
+
207
253
  end
208
254
 
209
255
 
@@ -15,6 +15,11 @@ module Cul
15
15
  @rsolr ||= RSolr.connect(:url => @url)
16
16
  end
17
17
 
18
+ def delete_index
19
+ rsolr.delete_by_query("*:*")
20
+ rsolr.commit
21
+ end
22
+
18
23
  def ingest(options = {})
19
24
  format = options.delete(:format) || raise(ArgumentError, "needs format")
20
25
 
@@ -22,13 +27,14 @@ module Cul
22
27
  items = [items] unless items.kind_of?(Array)
23
28
  collections = options.delete(:collections) || []
24
29
  collections = [collections] unless collections.kind_of?(Array)
30
+ ignore = options.delete(:ignore) || []
31
+ ignore = [ignore] unless ignore.kind_of?(Array)
25
32
 
26
33
  overwrite = options.delete(:overwrite) || false
27
34
  process = options.delete(:process) || nil
28
35
  skip = options.delete(:skip) || nil
29
36
 
30
-
31
-
37
+
32
38
  collections.each do |collection|
33
39
  items |= collection.listMembers
34
40
  end
@@ -40,6 +46,12 @@ module Cul
40
46
  errors = {}
41
47
 
42
48
  items.each do |i|
49
+
50
+ if(ignore.index(i.pid).nil? == false)
51
+ $stdout.puts "Ignoring " + i.pid + "..."
52
+ next
53
+ end
54
+
43
55
  if process && skip && skip > 0
44
56
  skip -= 1
45
57
  next
@@ -55,7 +67,7 @@ module Cul
55
67
  end
56
68
 
57
69
 
58
-
70
+ $stdout.puts "Indexing " + i.pid + "..."
59
71
 
60
72
  result_hash = i.send("index_for_#{format}", options)
61
73
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cul-fedora
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 63
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 7
8
+ - 8
9
9
  - 0
10
- version: 0.7.0
10
+ version: 0.8.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - James Stuart
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-22 00:00:00 -04:00
18
+ date: 2011-05-02 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency