sufia-models 0.0.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.md +177 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/app/models/batch.rb +46 -0
  8. data/app/models/checksum_audit_log.rb +35 -0
  9. data/app/models/contact_form.rb +42 -0
  10. data/app/models/datastreams/batch_rdf_datastream.rb +23 -0
  11. data/app/models/datastreams/file_content_datastream.rb +18 -0
  12. data/app/models/datastreams/fits_datastream.rb +188 -0
  13. data/app/models/datastreams/generic_file_rdf_datastream.rb +75 -0
  14. data/app/models/datastreams/paranoid_rights_datastream.rb +37 -0
  15. data/app/models/datastreams/properties_datastream.rb +33 -0
  16. data/app/models/domain_term.rb +18 -0
  17. data/app/models/follow.rb +28 -0
  18. data/app/models/generic_file.rb +16 -0
  19. data/app/models/geo_names_resource.rb +34 -0
  20. data/app/models/group.rb +8 -0
  21. data/app/models/local_authority.rb +93 -0
  22. data/app/models/local_authority_entry.rb +18 -0
  23. data/app/models/single_use_link.rb +26 -0
  24. data/app/models/subject_local_authority_entry.rb +16 -0
  25. data/app/models/trophy.rb +12 -0
  26. data/app/models/version_committer.rb +17 -0
  27. data/lib/sufia/models.rb +11 -0
  28. data/lib/sufia/models/active_fedora/redis.rb +49 -0
  29. data/lib/sufia/models/active_record/redis.rb +56 -0
  30. data/lib/sufia/models/engine.rb +34 -0
  31. data/lib/sufia/models/file_content.rb +9 -0
  32. data/lib/sufia/models/file_content/extract_metadata.rb +60 -0
  33. data/lib/sufia/models/file_content/versions.rb +23 -0
  34. data/lib/sufia/models/generic_file.rb +183 -0
  35. data/lib/sufia/models/generic_file/actions.rb +39 -0
  36. data/lib/sufia/models/generic_file/audit.rb +119 -0
  37. data/lib/sufia/models/generic_file/characterization.rb +81 -0
  38. data/lib/sufia/models/generic_file/export.rb +339 -0
  39. data/lib/sufia/models/generic_file/permissions.rb +64 -0
  40. data/lib/sufia/models/generic_file/thumbnail.rb +91 -0
  41. data/lib/sufia/models/id_service.rb +57 -0
  42. data/lib/sufia/models/jobs/audit_job.rb +65 -0
  43. data/lib/sufia/models/jobs/batch_update_job.rb +86 -0
  44. data/lib/sufia/models/jobs/characterize_job.rb +43 -0
  45. data/lib/sufia/models/jobs/content_delete_event_job.rb +31 -0
  46. data/lib/sufia/models/jobs/content_deposit_event_job.rb +32 -0
  47. data/lib/sufia/models/jobs/content_new_version_event_job.rb +32 -0
  48. data/lib/sufia/models/jobs/content_restored_version_event_job.rb +40 -0
  49. data/lib/sufia/models/jobs/content_update_event_job.rb +32 -0
  50. data/lib/sufia/models/jobs/event_job.rb +33 -0
  51. data/lib/sufia/models/jobs/ffmpeg_transcode_job.rb +61 -0
  52. data/lib/sufia/models/jobs/resolrize_job.rb +23 -0
  53. data/lib/sufia/models/jobs/transcode_audio_job.rb +40 -0
  54. data/lib/sufia/models/jobs/transcode_video_job.rb +39 -0
  55. data/lib/sufia/models/jobs/unzip_job.rb +54 -0
  56. data/lib/sufia/models/jobs/user_edit_profile_event_job.rb +35 -0
  57. data/lib/sufia/models/jobs/user_follow_event_job.rb +37 -0
  58. data/lib/sufia/models/jobs/user_unfollow_event_job.rb +38 -0
  59. data/lib/sufia/models/model_methods.rb +39 -0
  60. data/lib/sufia/models/noid.rb +42 -0
  61. data/lib/sufia/models/solr_document_behavior.rb +125 -0
  62. data/lib/sufia/models/user.rb +126 -0
  63. data/lib/sufia/models/utils.rb +36 -0
  64. data/lib/sufia/models/version.rb +5 -0
  65. data/lib/tasks/sufia-models_tasks.rake +4 -0
  66. data/sufia-models.gemspec +28 -0
  67. metadata +151 -0
@@ -0,0 +1,119 @@
1
+ module Sufia
2
+ module GenericFile
3
+ module Audit
4
+ extend ActiveSupport::Concern
5
+ included do
6
+ end
7
+
8
+ NO_RUNS = 999
9
+
10
+ def audit(force = false)
11
+ logs = []
12
+ self.per_version do |ver|
13
+ logs << ::GenericFile.audit(ver, force)
14
+ end
15
+ logs
16
+ end
17
+
18
+ def per_version(&block)
19
+ self.datastreams.each do |dsid, ds|
20
+ ds.versions.each do |ver|
21
+ block.call(ver)
22
+ end
23
+ end
24
+ end
25
+
26
+ def logs(dsid)
27
+ ChecksumAuditLog.where(:dsid=>dsid, :pid=>self.pid).order('created_at desc, id desc')
28
+ end
29
+
30
+ def audit!
31
+ audit(true)
32
+ end
33
+
34
+ def audit_stat!
35
+ audit_stat(true)
36
+ end
37
+
38
+ def audit_stat(force = false)
39
+ logs = audit(force)
40
+ audit_results = logs.collect { |result| result["pass"] }
41
+
42
+ # check how many non runs we had
43
+ non_runs =audit_results.reduce(0) { |sum, value| (value == NO_RUNS) ? sum = sum+1 : sum }
44
+ if (non_runs == 0)
45
+ result =audit_results.reduce(true) { |sum, value| sum && value }
46
+ return result
47
+ elsif (non_runs < audit_results.length)
48
+ result =audit_results.reduce(true) { |sum, value| (value == NO_RUNS) ? sum : sum && value }
49
+ return 'Some audits have not been run, but the ones run were '+ ((result)? 'passing' : 'failing') + '.'
50
+ else
51
+ return 'Audits have not yet been run on this file.'
52
+ end
53
+ end
54
+
55
+
56
+ module ClassMethods
57
+ def audit!(version)
58
+ ::GenericFile.audit(version, true)
59
+ end
60
+
61
+ def audit(version, force = false)
62
+ latest_audit = self.find(version.pid).logs(version.dsid).first
63
+ unless force
64
+ return latest_audit unless ::GenericFile.needs_audit?(version, latest_audit)
65
+ end
66
+ # Resque.enqueue(AuditJob, version.pid, version.dsid, version.versionID)
67
+ Sufia.queue.push(AuditJob.new(version.pid, version.dsid, version.versionID))
68
+
69
+ # run the find just incase the job has finished already
70
+ latest_audit = self.find(version.pid).logs(version.dsid).first
71
+ latest_audit = ChecksumAuditLog.new(:pass=>NO_RUNS, :pid=>version.pid, :dsid=>version.dsid, :version=>version.versionID) unless latest_audit
72
+ return latest_audit
73
+ end
74
+
75
+ def needs_audit?(version, latest_audit)
76
+ if latest_audit and latest_audit.updated_at
77
+ #logger.debug "***AUDIT*** last audit = #{latest_audit.updated_at.to_date}"
78
+ days_since_last_audit = (DateTime.now - latest_audit.updated_at.to_date).to_i
79
+ #logger.debug "***AUDIT*** days since last audit: #{days_since_last_audit}"
80
+ if days_since_last_audit < Sufia::Engine.config.max_days_between_audits
81
+ #logger.debug "***AUDIT*** No audit needed for #{version.pid} #{version.versionID} (#{latest_audit.updated_at})"
82
+ return false
83
+ end
84
+ else
85
+ logger.warn "***AUDIT*** problem with audit log! Latest Audit is not nil, but updated_at is not set #{latest_audit}" unless latest_audit.nil?
86
+ end
87
+ #logger.info "***AUDIT*** Audit needed for #{version.pid} #{version.versionID}"
88
+ return true
89
+ end
90
+
91
+ def audit_everything(force = false)
92
+ ::GenericFile.find_each do |gf|
93
+ gf.per_version do |ver|
94
+ ::GenericFile.audit(ver, force)
95
+ end
96
+ end
97
+ end
98
+
99
+ def audit_everything!
100
+ ::GenericFile.audit_everything(true)
101
+ end
102
+
103
+ def run_audit(version)
104
+ if version.dsChecksumValid
105
+ #logger.info "***AUDIT*** Audit passed for #{version.pid} #{version.versionID}"
106
+ passing = 1
107
+ ChecksumAuditLog.prune_history(version)
108
+ else
109
+ logger.warn "***AUDIT*** Audit failed for #{version.pid} #{version.versionID}"
110
+ passing = 0
111
+ end
112
+ check = ChecksumAuditLog.create!(:pass=>passing, :pid=>version.pid,
113
+ :dsid=>version.dsid, :version=>version.versionID)
114
+ return check
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,81 @@
1
+ require 'datastreams/fits_datastream'
2
+ module Sufia
3
+ module GenericFile
4
+ module Characterization
5
+ extend ActiveSupport::Concern
6
+ included do
7
+ has_metadata :name => "characterization", :type => FitsDatastream
8
+ delegate :mime_type, :to => :characterization, :unique => true
9
+ delegate_to :characterization, [:format_label, :file_size, :last_modified,
10
+ :filename, :original_checksum, :rights_basis,
11
+ :copyright_basis, :copyright_note,
12
+ :well_formed, :valid, :status_message,
13
+ :file_title, :file_author, :page_count,
14
+ :file_language, :word_count, :character_count,
15
+ :paragraph_count, :line_count, :table_count,
16
+ :graphics_count, :byte_order, :compression,
17
+ :width, :height, :color_space, :profile_name,
18
+ :profile_version, :orientation, :color_map,
19
+ :image_producer, :capture_device,
20
+ :scanning_software, :exif_version,
21
+ :gps_timestamp, :latitude, :longitude,
22
+ :character_set, :markup_basis,
23
+ :markup_language, :duration, :bit_depth,
24
+ :sample_rate, :channels, :data_format, :offset]
25
+
26
+ end
27
+
28
+ def characterize_if_changed
29
+ content_changed = self.content.changed?
30
+ yield
31
+ #logger.debug "DOING CHARACTERIZE ON #{self.pid}"
32
+ Sufia.queue.push(CharacterizeJob.new(self.pid)) if content_changed
33
+ end
34
+
35
+ ## Extract the metadata from the content datastream and record it in the characterization datastream
36
+ def characterize
37
+ self.characterization.ng_xml = self.content.extract_metadata
38
+ self.append_metadata
39
+ self.filename = self.label
40
+ save unless self.new_object?
41
+ end
42
+
43
+ # Populate descMetadata with fields from FITS (e.g. Author from pdfs)
44
+ def append_metadata
45
+ terms = self.characterization_terms
46
+ Sufia::Engine.config.fits_to_desc_mapping.each_pair do |k, v|
47
+ if terms.has_key?(k)
48
+ # coerce to array to remove a conditional
49
+ terms[k] = [terms[k]] unless terms[k].is_a? Array
50
+ terms[k].each do |term_value|
51
+ proxy_term = self.send(v)
52
+ if proxy_term.kind_of?(Array)
53
+ proxy_term << term_value unless proxy_term.include?(term_value)
54
+ else
55
+ # these are single-valued terms which cannot be appended to
56
+ self.send("#{v}=", term_value)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ def characterization_terms
64
+ h = {}
65
+ self.characterization.class.terminology.terms.each_pair do |k, v|
66
+ next unless v.respond_to? :proxied_term
67
+ term = v.proxied_term
68
+ begin
69
+ value = self.send(term.name)
70
+ h[term.name] = value unless value.empty?
71
+ rescue NoMethodError
72
+ next
73
+ end
74
+ end
75
+ h
76
+ end
77
+
78
+
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,339 @@
1
+ module Sufia
2
+ module GenericFile
3
+ module Export
4
+ # MIME: 'application/x-endnote-refer'
5
+ def export_as_endnote
6
+ end_note_format = {
7
+ '%T' => [:title, lambda { |x| x.first }],
8
+ '%Q' => [:title, lambda { |x| x.drop(1) }],
9
+ '%A' => [:creator],
10
+ '%C' => [:publication_place],
11
+ '%D' => [:date_created],
12
+ '%8' => [:date_uploaded],
13
+ '%E' => [:contributor],
14
+ '%I' => [:publisher],
15
+ '%J' => [:series_title],
16
+ '%@' => [:isbn],
17
+ '%U' => [:related_url],
18
+ '%7' => [:edition_statement],
19
+ '%R' => [:persistent_url],
20
+ '%X' => [:description],
21
+ '%G' => [:language],
22
+ '%[' => [:date_modified],
23
+ '%9' => [:resource_type],
24
+ '%~' => Application.config.application_name,
25
+ '%W' => 'Penn State University'
26
+ }
27
+ text = []
28
+ text << "%0 GenericFile"
29
+ end_note_format.each do |endnote_key, mapping|
30
+ if mapping.is_a? String
31
+ values = [mapping]
32
+ else
33
+ values = self.send(mapping[0]) if self.respond_to? mapping[0]
34
+ values = mapping[1].call(values) if mapping.length == 2
35
+ values = [values] unless values.is_a? Array
36
+ end
37
+ next if values.empty? or values.first.nil?
38
+ spaced_values = values.join("; ")
39
+ text << "#{endnote_key} #{spaced_values}"
40
+ end
41
+ return text.join("\n")
42
+ end
43
+
44
+ # MIME type: 'application/x-openurl-ctx-kev'
45
+ def export_as_openurl_ctx_kev
46
+ export_text = []
47
+ export_text << "url_ver=Z39.88-2004&ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc&rfr_id=info%3Asid%2Fblacklight.rubyforge.org%3Agenerator"
48
+ field_map = {
49
+ :title => 'title',
50
+ :creator => 'creator',
51
+ :subject => 'subject',
52
+ :description => 'description',
53
+ :publisher => 'publisher',
54
+ :contributor => 'contributor',
55
+ :date_created => 'date',
56
+ :resource_type => 'format',
57
+ :identifier => 'identifier',
58
+ :language => 'language',
59
+ :tag => 'relation',
60
+ :based_near => 'coverage',
61
+ :rights => 'rights'
62
+ }
63
+ field_map.each do |element, kev|
64
+ values = self.send(element)
65
+ next if values.empty? or values.first.nil?
66
+ values.each do |value|
67
+ export_text << "rft.#{kev}=#{CGI::escape(value)}"
68
+ end
69
+ end
70
+ export_text.join('&') unless export_text.blank?
71
+ end
72
+
73
+ def export_as_apa_citation
74
+ text = ''
75
+ authors_list = []
76
+ authors_list_final = []
77
+
78
+ #setup formatted author list
79
+ authors = get_author_list
80
+ authors.each do |author|
81
+ next if author.blank?
82
+ authors_list.push(abbreviate_name(author))
83
+ end
84
+ authors_list.each do |author|
85
+ if author == authors_list.first #first
86
+ authors_list_final.push(author.strip)
87
+ elsif author == authors_list.last #last
88
+ authors_list_final.push(", &amp; " + author.strip)
89
+ else #all others
90
+ authors_list_final.push(", " + author.strip)
91
+ end
92
+ end
93
+ text << authors_list_final.join
94
+ unless text.blank?
95
+ if text[-1,1] != "."
96
+ text << ". "
97
+ else
98
+ text << " "
99
+ end
100
+ end
101
+ # Get Pub Date
102
+ text << "(" + setup_pub_date + "). " unless setup_pub_date.nil?
103
+
104
+ # setup title info
105
+ title_info = setup_title_info
106
+ text << "<i>" + title_info + "</i> " unless title_info.nil?
107
+
108
+ # Publisher info
109
+ text << setup_pub_info unless setup_pub_info.nil?
110
+ unless text.blank?
111
+ if text[-1,1] != "."
112
+ text += "."
113
+ end
114
+ end
115
+ text.html_safe
116
+ end
117
+
118
+ def export_as_mla_citation
119
+ text = ''
120
+ authors_final = []
121
+
122
+ #setup formatted author list
123
+ authors = get_author_list
124
+
125
+ if authors.length < 4
126
+ authors.each do |author|
127
+ if author == authors.first #first
128
+ authors_final.push(author)
129
+ elsif author == authors.last #last
130
+ authors_final.push(", and " + name_reverse(author) + ".")
131
+ else #all others
132
+ authors_final.push(", " + name_reverse(author))
133
+ end
134
+ end
135
+ text << authors_final.join
136
+ unless text.blank?
137
+ if text[-1,1] != "."
138
+ text << ". "
139
+ else
140
+ text << " "
141
+ end
142
+ end
143
+ else
144
+ text << authors.first + ", et al. "
145
+ end
146
+ # setup title
147
+ title_info = setup_title_info
148
+ text << "<i>" + mla_citation_title(title_info) + "</i> " unless title.blank?
149
+
150
+ # Publication
151
+ text << setup_pub_info + ", " unless setup_pub_info.nil?
152
+
153
+ # Get Pub Date
154
+ text << setup_pub_date unless setup_pub_date.nil?
155
+ if text[-1,1] != "."
156
+ text << "." unless text.blank?
157
+ end
158
+ text.html_safe
159
+ end
160
+
161
+ def export_as_chicago_citation
162
+ author_text = ""
163
+ authors = get_all_authors
164
+ unless authors.blank?
165
+ if authors.length > 10
166
+ authors.each_with_index do |author, index|
167
+ if index < 7
168
+ if index == 0
169
+ author_text << "#{author}"
170
+ if author.ends_with?(",")
171
+ author_text << " "
172
+ else
173
+ author_text << ", "
174
+ end
175
+ else
176
+ author_text << "#{name_reverse(author)}, "
177
+ end
178
+ end
179
+ end
180
+ author_text << " et al."
181
+ elsif authors.length > 1
182
+ authors.each_with_index do |author,index|
183
+ if index == 0
184
+ author_text << "#{author}"
185
+ if author.ends_with?(",")
186
+ author_text << " "
187
+ else
188
+ author_text << ", "
189
+ end
190
+ elsif index + 1 == authors.length
191
+ author_text << "and #{name_reverse(author)}."
192
+ else
193
+ author_text << "#{name_reverse(author)}, "
194
+ end
195
+ end
196
+ else
197
+ author_text << authors.first
198
+ end
199
+ end
200
+ title_info = ""
201
+ title_info << citation_title(clean_end_punctuation(CGI::escapeHTML(title.first)).strip) unless title.blank?
202
+
203
+ pub_info = ""
204
+ place = self.based_near.first
205
+ publisher = self.publisher.first
206
+ unless place.blank?
207
+ place = CGI::escapeHTML(place)
208
+ pub_info << place
209
+ pub_info << ": " unless publisher.blank?
210
+ end
211
+ unless publisher.blank?
212
+ publisher = CGI::escapeHTML(publisher)
213
+ pub_info << publisher
214
+ pub_info << ", " unless setup_pub_date.nil?
215
+ end
216
+ unless setup_pub_date.nil?
217
+ pub_info << setup_pub_date
218
+ end
219
+
220
+ citation = ""
221
+ citation << "#{author_text} " unless author_text.blank?
222
+ citation << "<i>#{title_info}.</i> " unless title_info.blank?
223
+ citation << "#{pub_info}." unless pub_info.blank?
224
+ citation.html_safe
225
+ end
226
+
227
+ private
228
+
229
+ def setup_pub_date
230
+ first_date = self.date_created.first
231
+ unless first_date.blank?
232
+ first_date = CGI::escapeHTML(first_date)
233
+ date_value = first_date.gsub(/[^0-9|n\.d\.]/, "")[0,4]
234
+ return nil if date_value.nil?
235
+ end
236
+ clean_end_punctuation(date_value) if date_value
237
+ end
238
+
239
+ def setup_pub_info
240
+ text = ''
241
+ place = self.based_near.first
242
+ publisher = self.publisher.first
243
+ unless place.blank?
244
+ place = CGI::escapeHTML(place)
245
+ text << place
246
+ text << ": " unless publisher.blank?
247
+ end
248
+ unless publisher.blank?
249
+ publisher = CGI::escapeHTML(publisher)
250
+ text << publisher
251
+ end
252
+ return nil if text.strip.blank?
253
+ clean_end_punctuation(text.strip)
254
+ end
255
+
256
+ def mla_citation_title(text)
257
+ no_upcase = ["a","an","and","but","by","for","it","of","the","to","with"]
258
+ new_text = []
259
+ word_parts = text.split(" ")
260
+ word_parts.each do |w|
261
+ if !no_upcase.include? w
262
+ new_text.push(w.capitalize)
263
+ else
264
+ new_text.push(w)
265
+ end
266
+ end
267
+ new_text.join(" ")
268
+ end
269
+
270
+ def citation_title(title_text)
271
+ prepositions = ["a","about","across","an","and","before","but","by","for","it","of","the","to","with","without"]
272
+ new_text = []
273
+ title_text.split(" ").each_with_index do |word,index|
274
+ if (index == 0 and word != word.upcase) or (word.length > 1 and word != word.upcase and !prepositions.include?(word))
275
+ # the split("-") will handle the capitalization of hyphenated words
276
+ new_text << word.split("-").map!{|w| w.capitalize }.join("-")
277
+ else
278
+ new_text << word
279
+ end
280
+ end
281
+ new_text.join(" ")
282
+ end
283
+
284
+ def setup_title_info
285
+ text = ''
286
+ title = self.title.first
287
+ unless title.blank?
288
+ title = CGI::escapeHTML(title)
289
+ title_info = clean_end_punctuation(title.strip)
290
+ text << title_info
291
+ end
292
+
293
+ return nil if text.strip.blank?
294
+ clean_end_punctuation(text.strip) + "."
295
+ end
296
+
297
+ def clean_end_punctuation(text)
298
+ if [".",",",":",";","/"].include? text[-1,1]
299
+ return text[0,text.length-1]
300
+ end
301
+ text
302
+ end
303
+
304
+ def get_author_list
305
+ self.creator.map { |author| clean_end_punctuation(CGI::escapeHTML(author)) }.uniq
306
+ end
307
+
308
+ def get_all_authors
309
+ authors = self.creator
310
+ return authors.empty? ? nil : authors.map { |author| CGI::escapeHTML(author) }
311
+ end
312
+
313
+ def abbreviate_name(name)
314
+ abbreviated_name = ''
315
+ name = name.join('') if name.is_a? Array
316
+ # make sure we handle "Cher" correctly
317
+ return name if !name.include?(' ') and !name.include?(',')
318
+ surnames_first = name.include?(',')
319
+ delimiter = surnames_first ? ', ' : ' '
320
+ name_segments = name.split(delimiter)
321
+ given_names = surnames_first ? name_segments.last.split(' ') : name_segments.first.split(' ')
322
+ surnames = surnames_first ? name_segments.first.split(' ') : name_segments.last.split(' ')
323
+ abbreviated_name << surnames.join(' ')
324
+ abbreviated_name << ', '
325
+ abbreviated_name << given_names.map { |n| "#{n[0]}." }.join if given_names.is_a? Array
326
+ abbreviated_name << "#{given_names[0]}." if given_names.is_a? String
327
+ abbreviated_name
328
+ end
329
+
330
+ def name_reverse(name)
331
+ name = clean_end_punctuation(name)
332
+ return name unless name =~ /,/
333
+ temp_name = name.split(", ")
334
+ return temp_name.last + " " + temp_name.first
335
+ end
336
+
337
+ end
338
+ end
339
+ end