cul-fedora 0.7.0 → 0.8.0

Sign up to get free protection for your applications and to get access to all the features.
data/VERSION CHANGED
@@ -1 +1 @@
1
- 0.7.0
1
+ 0.8.0
data/cul-fedora.gemspec CHANGED
@@ -5,11 +5,11 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{cul-fedora}
8
- s.version = "0.7.0"
8
+ s.version = "0.8.0"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["James Stuart"]
12
- s.date = %q{2011-04-22}
12
+ s.date = %q{2011-05-02}
13
13
  s.description = %q{Columbia-specific Fedora libraries}
14
14
  s.email = %q{tastyhat@jamesstuart.org}
15
15
  s.extra_rdoc_files = [
@@ -103,9 +103,12 @@ module Cul
103
103
 
104
104
  get_fullname = lambda { |node| node.nil? ? nil : (node.css("namePart[@type='family']").collect(&:content) | node.css("namePart[@type='given']").collect(&:content)).join(", ") }
105
105
 
106
- roles = ["Author","author","Creator","Thesis Advisor","Collector","Owner","Speaker","Seminar Chairman","Secretary","Rapporteur","Committee Member","Degree Grantor","Moderator","Editor","Interviewee","Interviewer","Organizer of Meeting","Originator","Teacher"]
107
-
106
+ roles = ["Author","Creator","Thesis Advisor","Collector","Owner","Speaker","Seminar Chairman","Secretary","Rapporteur","Committee Member","Degree Grantor","Moderator","Editor","Interviewee","Interviewer","Organizer of Meeting","Originator","Teacher"]
107
+ roles = roles.map { |role| role.downcase }
108
108
 
109
+ organizations = []
110
+ departments = []
111
+
109
112
  begin
110
113
  collections = self.belongsTo
111
114
  meta = describedBy.first
@@ -124,29 +127,57 @@ module Cul
124
127
 
125
128
 
126
129
 
127
- title = normalize_space.call(mods.css("titleInfo>nonSort,title").collect(&:content).join(" "))
130
+ title = mods.css("titleInfo>title").first.text
131
+ title_search = normalize_space.call(mods.css("titleInfo>nonSort,title").collect(&:content).join(" "))
132
+ record_creation_date = mods.at_css("recordInfo>recordCreationDate")
133
+ if(record_creation_date.nil?)
134
+ record_creation_date = mods.at_css("recordInfo>recordChangeDate")
135
+ end
136
+ if(!record_creation_date.nil? || !record_creation_date.empty?)
137
+ record_creation_date = DateTime.parse(record_creation_date.text.gsub("UTC", "").strip)
138
+ add_field.call("record_creation_date", record_creation_date.strftime("%Y-%m-%dT%H:%M:%SZ"))
139
+ end
128
140
  add_field.call("title_display", title)
129
- add_field.call("title_search", title)
141
+ add_field.call("title_search", title_search)
130
142
 
131
143
  all_names = []
132
144
  mods.css("name[@type='personal']").each do |name_node|
133
- if name_node.css("role>roleTerm[@type='text']").collect(&:content).any? { |role| roles.include?(role) }
145
+ if name_node.css("role>roleTerm").collect(&:content).any? { |role| roles.include?(role) }
134
146
 
135
147
  fullname = get_fullname.call(name_node)
136
148
 
137
149
  all_names << fullname
138
- add_field.call("author_id_uni", name_node.at_css("authorID[@type='institution']"))
139
- add_field.call("author_id_repository", name_node.at_css("authorID[@type='repository']"))
140
- add_field.call("author_id_naf", name_node.at_css("authorID[@type='naf']"))
150
+ if(!name_node["ID"].nil?)
151
+ add_field.call("author_id_uni", name_node["ID"])
152
+ end
141
153
  add_field.call("author_search", fullname.downcase)
142
154
  add_field.call("author_facet", fullname)
155
+
156
+ name_node.css("affiliation").each do |affiliation_node|
157
+ affiliation_text = affiliation_node.text
158
+ if(affiliation_text.include?(". "))
159
+ affiliation_split = affiliation_text.split(". ")
160
+ organizations.push(affiliation_split[0].strip)
161
+ departments.push(affiliation_split[1].strip)
162
+ end
163
+ end
143
164
 
144
165
  end
145
-
166
+ end
167
+
168
+ mods.css("name[@type='corporate']").each do |corp_name_node|
169
+ if(!corp_name_node["ID"].nil? && corp_name_node["ID"].include?("originator"))
170
+ name_part = corp_name_node.at_css("namePart").text
171
+ if(name_part.include?(". "))
172
+ name_part_split = name_part.split(". ")
173
+ organizations.push(name_part_split[0].strip)
174
+ departments.push(name_part_split[1].strip)
175
+ end
176
+ end
146
177
  end
147
178
 
148
179
  add_field.call("authors_display",all_names.join("; "))
149
- add_field.call("date", mods.at_css("*[@keyDate='yes']"))
180
+ add_field.call("pub_date", mods.at_css("*[@keyDate='yes']"))
150
181
 
151
182
  mods.css("genre").each do |genre_node|
152
183
  add_field.call("genre_facet", genre_node)
@@ -158,14 +189,14 @@ module Cul
158
189
  add_field.call("abstract", mods.at_css("abstract"))
159
190
  add_field.call("handle", mods.at_css("identifier[@type='hdl']"))
160
191
 
161
- mods.css("subject:not([@authority='local'])>topic").each do |topic_node|
162
- add_field.call("keyword_search", topic_node.content.downcase)
163
- add_field.call("keyword_facet", topic_node)
164
- end
165
-
166
- mods.css("subject[@authority='local']>topic").each do |topic_node|
167
- add_field.call("subject", topic_node)
168
- add_field.call("subject_search", topic_node)
192
+ mods.css("subject").each do |subject_node|
193
+ if(subject_node.attributes.count == 0)
194
+ subject_node.css("topic").each do |topic_node|
195
+ add_field.call("keyword_search", topic_node.content.downcase)
196
+ add_field.call("subject", topic_node)
197
+ add_field.call("subject_search", topic_node)
198
+ end
199
+ end
169
200
  end
170
201
 
171
202
 
@@ -203,7 +234,22 @@ module Cul
203
234
  add_field.call("geographic_area_search", geo)
204
235
  end
205
236
 
206
- add_field.call("export_as_mla_citation_txt","test")
237
+ add_field.call("export_as_mla_citation_txt","")
238
+
239
+ if(organizations.count > 0)
240
+ organizations = organizations.uniq
241
+ organizations.each do |organization|
242
+ add_field.call("affiliation_organization", organization)
243
+ end
244
+ end
245
+
246
+ if(departments.count > 0)
247
+ departments = departments.uniq
248
+ departments.each do |department|
249
+ add_field.call("affiliation_department", department.to_s.sub(", Department of", "").strip)
250
+ end
251
+ end
252
+
207
253
  end
208
254
 
209
255
 
@@ -15,6 +15,11 @@ module Cul
15
15
  @rsolr ||= RSolr.connect(:url => @url)
16
16
  end
17
17
 
18
+ def delete_index
19
+ rsolr.delete_by_query("*:*")
20
+ rsolr.commit
21
+ end
22
+
18
23
  def ingest(options = {})
19
24
  format = options.delete(:format) || raise(ArgumentError, "needs format")
20
25
 
@@ -22,13 +27,14 @@ module Cul
22
27
  items = [items] unless items.kind_of?(Array)
23
28
  collections = options.delete(:collections) || []
24
29
  collections = [collections] unless collections.kind_of?(Array)
30
+ ignore = options.delete(:ignore) || []
31
+ ignore = [ignore] unless ignore.kind_of?(Array)
25
32
 
26
33
  overwrite = options.delete(:overwrite) || false
27
34
  process = options.delete(:process) || nil
28
35
  skip = options.delete(:skip) || nil
29
36
 
30
-
31
-
37
+
32
38
  collections.each do |collection|
33
39
  items |= collection.listMembers
34
40
  end
@@ -40,6 +46,12 @@ module Cul
40
46
  errors = {}
41
47
 
42
48
  items.each do |i|
49
+
50
+ if(ignore.index(i.pid).nil? == false)
51
+ $stdout.puts "Ignoring " + i.pid + "..."
52
+ next
53
+ end
54
+
43
55
  if process && skip && skip > 0
44
56
  skip -= 1
45
57
  next
@@ -55,7 +67,7 @@ module Cul
55
67
  end
56
68
 
57
69
 
58
-
70
+ $stdout.puts "Indexing " + i.pid + "..."
59
71
 
60
72
  result_hash = i.send("index_for_#{format}", options)
61
73
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cul-fedora
3
3
  version: !ruby/object:Gem::Version
4
- hash: 3
4
+ hash: 63
5
5
  prerelease: false
6
6
  segments:
7
7
  - 0
8
- - 7
8
+ - 8
9
9
  - 0
10
- version: 0.7.0
10
+ version: 0.8.0
11
11
  platform: ruby
12
12
  authors:
13
13
  - James Stuart
@@ -15,7 +15,7 @@ autorequire:
15
15
  bindir: bin
16
16
  cert_chain: []
17
17
 
18
- date: 2011-04-22 00:00:00 -04:00
18
+ date: 2011-05-02 00:00:00 -04:00
19
19
  default_executable:
20
20
  dependencies:
21
21
  - !ruby/object:Gem::Dependency