sufia-models 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +7 -0
  2. data/.gitignore +17 -0
  3. data/Gemfile +4 -0
  4. data/LICENSE.md +177 -0
  5. data/README.md +29 -0
  6. data/Rakefile +1 -0
  7. data/app/models/batch.rb +46 -0
  8. data/app/models/checksum_audit_log.rb +35 -0
  9. data/app/models/contact_form.rb +42 -0
  10. data/app/models/datastreams/batch_rdf_datastream.rb +23 -0
  11. data/app/models/datastreams/file_content_datastream.rb +18 -0
  12. data/app/models/datastreams/fits_datastream.rb +188 -0
  13. data/app/models/datastreams/generic_file_rdf_datastream.rb +75 -0
  14. data/app/models/datastreams/paranoid_rights_datastream.rb +37 -0
  15. data/app/models/datastreams/properties_datastream.rb +33 -0
  16. data/app/models/domain_term.rb +18 -0
  17. data/app/models/follow.rb +28 -0
  18. data/app/models/generic_file.rb +16 -0
  19. data/app/models/geo_names_resource.rb +34 -0
  20. data/app/models/group.rb +8 -0
  21. data/app/models/local_authority.rb +93 -0
  22. data/app/models/local_authority_entry.rb +18 -0
  23. data/app/models/single_use_link.rb +26 -0
  24. data/app/models/subject_local_authority_entry.rb +16 -0
  25. data/app/models/trophy.rb +12 -0
  26. data/app/models/version_committer.rb +17 -0
  27. data/lib/sufia/models.rb +11 -0
  28. data/lib/sufia/models/active_fedora/redis.rb +49 -0
  29. data/lib/sufia/models/active_record/redis.rb +56 -0
  30. data/lib/sufia/models/engine.rb +34 -0
  31. data/lib/sufia/models/file_content.rb +9 -0
  32. data/lib/sufia/models/file_content/extract_metadata.rb +60 -0
  33. data/lib/sufia/models/file_content/versions.rb +23 -0
  34. data/lib/sufia/models/generic_file.rb +183 -0
  35. data/lib/sufia/models/generic_file/actions.rb +39 -0
  36. data/lib/sufia/models/generic_file/audit.rb +119 -0
  37. data/lib/sufia/models/generic_file/characterization.rb +81 -0
  38. data/lib/sufia/models/generic_file/export.rb +339 -0
  39. data/lib/sufia/models/generic_file/permissions.rb +64 -0
  40. data/lib/sufia/models/generic_file/thumbnail.rb +91 -0
  41. data/lib/sufia/models/id_service.rb +57 -0
  42. data/lib/sufia/models/jobs/audit_job.rb +65 -0
  43. data/lib/sufia/models/jobs/batch_update_job.rb +86 -0
  44. data/lib/sufia/models/jobs/characterize_job.rb +43 -0
  45. data/lib/sufia/models/jobs/content_delete_event_job.rb +31 -0
  46. data/lib/sufia/models/jobs/content_deposit_event_job.rb +32 -0
  47. data/lib/sufia/models/jobs/content_new_version_event_job.rb +32 -0
  48. data/lib/sufia/models/jobs/content_restored_version_event_job.rb +40 -0
  49. data/lib/sufia/models/jobs/content_update_event_job.rb +32 -0
  50. data/lib/sufia/models/jobs/event_job.rb +33 -0
  51. data/lib/sufia/models/jobs/ffmpeg_transcode_job.rb +61 -0
  52. data/lib/sufia/models/jobs/resolrize_job.rb +23 -0
  53. data/lib/sufia/models/jobs/transcode_audio_job.rb +40 -0
  54. data/lib/sufia/models/jobs/transcode_video_job.rb +39 -0
  55. data/lib/sufia/models/jobs/unzip_job.rb +54 -0
  56. data/lib/sufia/models/jobs/user_edit_profile_event_job.rb +35 -0
  57. data/lib/sufia/models/jobs/user_follow_event_job.rb +37 -0
  58. data/lib/sufia/models/jobs/user_unfollow_event_job.rb +38 -0
  59. data/lib/sufia/models/model_methods.rb +39 -0
  60. data/lib/sufia/models/noid.rb +42 -0
  61. data/lib/sufia/models/solr_document_behavior.rb +125 -0
  62. data/lib/sufia/models/user.rb +126 -0
  63. data/lib/sufia/models/utils.rb +36 -0
  64. data/lib/sufia/models/version.rb +5 -0
  65. data/lib/tasks/sufia-models_tasks.rake +4 -0
  66. data/sufia-models.gemspec +28 -0
  67. metadata +151 -0
@@ -0,0 +1,119 @@
1
+ module Sufia
2
+ module GenericFile
3
+ module Audit
4
+ extend ActiveSupport::Concern
5
+ included do
6
+ end
7
+
8
+ NO_RUNS = 999
9
+
10
+ def audit(force = false)
11
+ logs = []
12
+ self.per_version do |ver|
13
+ logs << ::GenericFile.audit(ver, force)
14
+ end
15
+ logs
16
+ end
17
+
18
+ def per_version(&block)
19
+ self.datastreams.each do |dsid, ds|
20
+ ds.versions.each do |ver|
21
+ block.call(ver)
22
+ end
23
+ end
24
+ end
25
+
26
+ def logs(dsid)
27
+ ChecksumAuditLog.where(:dsid=>dsid, :pid=>self.pid).order('created_at desc, id desc')
28
+ end
29
+
30
+ def audit!
31
+ audit(true)
32
+ end
33
+
34
+ def audit_stat!
35
+ audit_stat(true)
36
+ end
37
+
38
+ def audit_stat(force = false)
39
+ logs = audit(force)
40
+ audit_results = logs.collect { |result| result["pass"] }
41
+
42
+ # check how many non runs we had
43
+ non_runs =audit_results.reduce(0) { |sum, value| (value == NO_RUNS) ? sum = sum+1 : sum }
44
+ if (non_runs == 0)
45
+ result =audit_results.reduce(true) { |sum, value| sum && value }
46
+ return result
47
+ elsif (non_runs < audit_results.length)
48
+ result =audit_results.reduce(true) { |sum, value| (value == NO_RUNS) ? sum : sum && value }
49
+ return 'Some audits have not been run, but the ones run were '+ ((result)? 'passing' : 'failing') + '.'
50
+ else
51
+ return 'Audits have not yet been run on this file.'
52
+ end
53
+ end
54
+
55
+
56
+ module ClassMethods
57
+ def audit!(version)
58
+ ::GenericFile.audit(version, true)
59
+ end
60
+
61
+ def audit(version, force = false)
62
+ latest_audit = self.find(version.pid).logs(version.dsid).first
63
+ unless force
64
+ return latest_audit unless ::GenericFile.needs_audit?(version, latest_audit)
65
+ end
66
+ # Resque.enqueue(AuditJob, version.pid, version.dsid, version.versionID)
67
+ Sufia.queue.push(AuditJob.new(version.pid, version.dsid, version.versionID))
68
+
69
+ # run the find just incase the job has finished already
70
+ latest_audit = self.find(version.pid).logs(version.dsid).first
71
+ latest_audit = ChecksumAuditLog.new(:pass=>NO_RUNS, :pid=>version.pid, :dsid=>version.dsid, :version=>version.versionID) unless latest_audit
72
+ return latest_audit
73
+ end
74
+
75
+ def needs_audit?(version, latest_audit)
76
+ if latest_audit and latest_audit.updated_at
77
+ #logger.debug "***AUDIT*** last audit = #{latest_audit.updated_at.to_date}"
78
+ days_since_last_audit = (DateTime.now - latest_audit.updated_at.to_date).to_i
79
+ #logger.debug "***AUDIT*** days since last audit: #{days_since_last_audit}"
80
+ if days_since_last_audit < Sufia::Engine.config.max_days_between_audits
81
+ #logger.debug "***AUDIT*** No audit needed for #{version.pid} #{version.versionID} (#{latest_audit.updated_at})"
82
+ return false
83
+ end
84
+ else
85
+ logger.warn "***AUDIT*** problem with audit log! Latest Audit is not nil, but updated_at is not set #{latest_audit}" unless latest_audit.nil?
86
+ end
87
+ #logger.info "***AUDIT*** Audit needed for #{version.pid} #{version.versionID}"
88
+ return true
89
+ end
90
+
91
+ def audit_everything(force = false)
92
+ ::GenericFile.find_each do |gf|
93
+ gf.per_version do |ver|
94
+ ::GenericFile.audit(ver, force)
95
+ end
96
+ end
97
+ end
98
+
99
+ def audit_everything!
100
+ ::GenericFile.audit_everything(true)
101
+ end
102
+
103
+ def run_audit(version)
104
+ if version.dsChecksumValid
105
+ #logger.info "***AUDIT*** Audit passed for #{version.pid} #{version.versionID}"
106
+ passing = 1
107
+ ChecksumAuditLog.prune_history(version)
108
+ else
109
+ logger.warn "***AUDIT*** Audit failed for #{version.pid} #{version.versionID}"
110
+ passing = 0
111
+ end
112
+ check = ChecksumAuditLog.create!(:pass=>passing, :pid=>version.pid,
113
+ :dsid=>version.dsid, :version=>version.versionID)
114
+ return check
115
+ end
116
+ end
117
+ end
118
+ end
119
+ end
@@ -0,0 +1,81 @@
1
+ require 'datastreams/fits_datastream'
2
+ module Sufia
3
+ module GenericFile
4
+ module Characterization
5
+ extend ActiveSupport::Concern
6
+ included do
7
+ has_metadata :name => "characterization", :type => FitsDatastream
8
+ delegate :mime_type, :to => :characterization, :unique => true
9
+ delegate_to :characterization, [:format_label, :file_size, :last_modified,
10
+ :filename, :original_checksum, :rights_basis,
11
+ :copyright_basis, :copyright_note,
12
+ :well_formed, :valid, :status_message,
13
+ :file_title, :file_author, :page_count,
14
+ :file_language, :word_count, :character_count,
15
+ :paragraph_count, :line_count, :table_count,
16
+ :graphics_count, :byte_order, :compression,
17
+ :width, :height, :color_space, :profile_name,
18
+ :profile_version, :orientation, :color_map,
19
+ :image_producer, :capture_device,
20
+ :scanning_software, :exif_version,
21
+ :gps_timestamp, :latitude, :longitude,
22
+ :character_set, :markup_basis,
23
+ :markup_language, :duration, :bit_depth,
24
+ :sample_rate, :channels, :data_format, :offset]
25
+
26
+ end
27
+
28
+ def characterize_if_changed
29
+ content_changed = self.content.changed?
30
+ yield
31
+ #logger.debug "DOING CHARACTERIZE ON #{self.pid}"
32
+ Sufia.queue.push(CharacterizeJob.new(self.pid)) if content_changed
33
+ end
34
+
35
+ ## Extract the metadata from the content datastream and record it in the characterization datastream
36
+ def characterize
37
+ self.characterization.ng_xml = self.content.extract_metadata
38
+ self.append_metadata
39
+ self.filename = self.label
40
+ save unless self.new_object?
41
+ end
42
+
43
+ # Populate descMetadata with fields from FITS (e.g. Author from pdfs)
44
+ def append_metadata
45
+ terms = self.characterization_terms
46
+ Sufia::Engine.config.fits_to_desc_mapping.each_pair do |k, v|
47
+ if terms.has_key?(k)
48
+ # coerce to array to remove a conditional
49
+ terms[k] = [terms[k]] unless terms[k].is_a? Array
50
+ terms[k].each do |term_value|
51
+ proxy_term = self.send(v)
52
+ if proxy_term.kind_of?(Array)
53
+ proxy_term << term_value unless proxy_term.include?(term_value)
54
+ else
55
+ # these are single-valued terms which cannot be appended to
56
+ self.send("#{v}=", term_value)
57
+ end
58
+ end
59
+ end
60
+ end
61
+ end
62
+
63
+ def characterization_terms
64
+ h = {}
65
+ self.characterization.class.terminology.terms.each_pair do |k, v|
66
+ next unless v.respond_to? :proxied_term
67
+ term = v.proxied_term
68
+ begin
69
+ value = self.send(term.name)
70
+ h[term.name] = value unless value.empty?
71
+ rescue NoMethodError
72
+ next
73
+ end
74
+ end
75
+ h
76
+ end
77
+
78
+
79
+ end
80
+ end
81
+ end
@@ -0,0 +1,339 @@
1
+ module Sufia
2
+ module GenericFile
3
+ module Export
4
+ # MIME: 'application/x-endnote-refer'
5
+ def export_as_endnote
6
+ end_note_format = {
7
+ '%T' => [:title, lambda { |x| x.first }],
8
+ '%Q' => [:title, lambda { |x| x.drop(1) }],
9
+ '%A' => [:creator],
10
+ '%C' => [:publication_place],
11
+ '%D' => [:date_created],
12
+ '%8' => [:date_uploaded],
13
+ '%E' => [:contributor],
14
+ '%I' => [:publisher],
15
+ '%J' => [:series_title],
16
+ '%@' => [:isbn],
17
+ '%U' => [:related_url],
18
+ '%7' => [:edition_statement],
19
+ '%R' => [:persistent_url],
20
+ '%X' => [:description],
21
+ '%G' => [:language],
22
+ '%[' => [:date_modified],
23
+ '%9' => [:resource_type],
24
+ '%~' => Application.config.application_name,
25
+ '%W' => 'Penn State University'
26
+ }
27
+ text = []
28
+ text << "%0 GenericFile"
29
+ end_note_format.each do |endnote_key, mapping|
30
+ if mapping.is_a? String
31
+ values = [mapping]
32
+ else
33
+ values = self.send(mapping[0]) if self.respond_to? mapping[0]
34
+ values = mapping[1].call(values) if mapping.length == 2
35
+ values = [values] unless values.is_a? Array
36
+ end
37
+ next if values.empty? or values.first.nil?
38
+ spaced_values = values.join("; ")
39
+ text << "#{endnote_key} #{spaced_values}"
40
+ end
41
+ return text.join("\n")
42
+ end
43
+
44
+ # MIME type: 'application/x-openurl-ctx-kev'
45
+ def export_as_openurl_ctx_kev
46
+ export_text = []
47
+ export_text << "url_ver=Z39.88-2004&ctx_ver=Z39.88-2004&rft_val_fmt=info%3Aofi%2Ffmt%3Akev%3Amtx%3Adc&rfr_id=info%3Asid%2Fblacklight.rubyforge.org%3Agenerator"
48
+ field_map = {
49
+ :title => 'title',
50
+ :creator => 'creator',
51
+ :subject => 'subject',
52
+ :description => 'description',
53
+ :publisher => 'publisher',
54
+ :contributor => 'contributor',
55
+ :date_created => 'date',
56
+ :resource_type => 'format',
57
+ :identifier => 'identifier',
58
+ :language => 'language',
59
+ :tag => 'relation',
60
+ :based_near => 'coverage',
61
+ :rights => 'rights'
62
+ }
63
+ field_map.each do |element, kev|
64
+ values = self.send(element)
65
+ next if values.empty? or values.first.nil?
66
+ values.each do |value|
67
+ export_text << "rft.#{kev}=#{CGI::escape(value)}"
68
+ end
69
+ end
70
+ export_text.join('&') unless export_text.blank?
71
+ end
72
+
73
+ def export_as_apa_citation
74
+ text = ''
75
+ authors_list = []
76
+ authors_list_final = []
77
+
78
+ #setup formatted author list
79
+ authors = get_author_list
80
+ authors.each do |author|
81
+ next if author.blank?
82
+ authors_list.push(abbreviate_name(author))
83
+ end
84
+ authors_list.each do |author|
85
+ if author == authors_list.first #first
86
+ authors_list_final.push(author.strip)
87
+ elsif author == authors_list.last #last
88
+ authors_list_final.push(", &amp; " + author.strip)
89
+ else #all others
90
+ authors_list_final.push(", " + author.strip)
91
+ end
92
+ end
93
+ text << authors_list_final.join
94
+ unless text.blank?
95
+ if text[-1,1] != "."
96
+ text << ". "
97
+ else
98
+ text << " "
99
+ end
100
+ end
101
+ # Get Pub Date
102
+ text << "(" + setup_pub_date + "). " unless setup_pub_date.nil?
103
+
104
+ # setup title info
105
+ title_info = setup_title_info
106
+ text << "<i>" + title_info + "</i> " unless title_info.nil?
107
+
108
+ # Publisher info
109
+ text << setup_pub_info unless setup_pub_info.nil?
110
+ unless text.blank?
111
+ if text[-1,1] != "."
112
+ text += "."
113
+ end
114
+ end
115
+ text.html_safe
116
+ end
117
+
118
+ def export_as_mla_citation
119
+ text = ''
120
+ authors_final = []
121
+
122
+ #setup formatted author list
123
+ authors = get_author_list
124
+
125
+ if authors.length < 4
126
+ authors.each do |author|
127
+ if author == authors.first #first
128
+ authors_final.push(author)
129
+ elsif author == authors.last #last
130
+ authors_final.push(", and " + name_reverse(author) + ".")
131
+ else #all others
132
+ authors_final.push(", " + name_reverse(author))
133
+ end
134
+ end
135
+ text << authors_final.join
136
+ unless text.blank?
137
+ if text[-1,1] != "."
138
+ text << ". "
139
+ else
140
+ text << " "
141
+ end
142
+ end
143
+ else
144
+ text << authors.first + ", et al. "
145
+ end
146
+ # setup title
147
+ title_info = setup_title_info
148
+ text << "<i>" + mla_citation_title(title_info) + "</i> " unless title.blank?
149
+
150
+ # Publication
151
+ text << setup_pub_info + ", " unless setup_pub_info.nil?
152
+
153
+ # Get Pub Date
154
+ text << setup_pub_date unless setup_pub_date.nil?
155
+ if text[-1,1] != "."
156
+ text << "." unless text.blank?
157
+ end
158
+ text.html_safe
159
+ end
160
+
161
+ def export_as_chicago_citation
162
+ author_text = ""
163
+ authors = get_all_authors
164
+ unless authors.blank?
165
+ if authors.length > 10
166
+ authors.each_with_index do |author, index|
167
+ if index < 7
168
+ if index == 0
169
+ author_text << "#{author}"
170
+ if author.ends_with?(",")
171
+ author_text << " "
172
+ else
173
+ author_text << ", "
174
+ end
175
+ else
176
+ author_text << "#{name_reverse(author)}, "
177
+ end
178
+ end
179
+ end
180
+ author_text << " et al."
181
+ elsif authors.length > 1
182
+ authors.each_with_index do |author,index|
183
+ if index == 0
184
+ author_text << "#{author}"
185
+ if author.ends_with?(",")
186
+ author_text << " "
187
+ else
188
+ author_text << ", "
189
+ end
190
+ elsif index + 1 == authors.length
191
+ author_text << "and #{name_reverse(author)}."
192
+ else
193
+ author_text << "#{name_reverse(author)}, "
194
+ end
195
+ end
196
+ else
197
+ author_text << authors.first
198
+ end
199
+ end
200
+ title_info = ""
201
+ title_info << citation_title(clean_end_punctuation(CGI::escapeHTML(title.first)).strip) unless title.blank?
202
+
203
+ pub_info = ""
204
+ place = self.based_near.first
205
+ publisher = self.publisher.first
206
+ unless place.blank?
207
+ place = CGI::escapeHTML(place)
208
+ pub_info << place
209
+ pub_info << ": " unless publisher.blank?
210
+ end
211
+ unless publisher.blank?
212
+ publisher = CGI::escapeHTML(publisher)
213
+ pub_info << publisher
214
+ pub_info << ", " unless setup_pub_date.nil?
215
+ end
216
+ unless setup_pub_date.nil?
217
+ pub_info << setup_pub_date
218
+ end
219
+
220
+ citation = ""
221
+ citation << "#{author_text} " unless author_text.blank?
222
+ citation << "<i>#{title_info}.</i> " unless title_info.blank?
223
+ citation << "#{pub_info}." unless pub_info.blank?
224
+ citation.html_safe
225
+ end
226
+
227
+ private
228
+
229
+ def setup_pub_date
230
+ first_date = self.date_created.first
231
+ unless first_date.blank?
232
+ first_date = CGI::escapeHTML(first_date)
233
+ date_value = first_date.gsub(/[^0-9|n\.d\.]/, "")[0,4]
234
+ return nil if date_value.nil?
235
+ end
236
+ clean_end_punctuation(date_value) if date_value
237
+ end
238
+
239
+ def setup_pub_info
240
+ text = ''
241
+ place = self.based_near.first
242
+ publisher = self.publisher.first
243
+ unless place.blank?
244
+ place = CGI::escapeHTML(place)
245
+ text << place
246
+ text << ": " unless publisher.blank?
247
+ end
248
+ unless publisher.blank?
249
+ publisher = CGI::escapeHTML(publisher)
250
+ text << publisher
251
+ end
252
+ return nil if text.strip.blank?
253
+ clean_end_punctuation(text.strip)
254
+ end
255
+
256
+ def mla_citation_title(text)
257
+ no_upcase = ["a","an","and","but","by","for","it","of","the","to","with"]
258
+ new_text = []
259
+ word_parts = text.split(" ")
260
+ word_parts.each do |w|
261
+ if !no_upcase.include? w
262
+ new_text.push(w.capitalize)
263
+ else
264
+ new_text.push(w)
265
+ end
266
+ end
267
+ new_text.join(" ")
268
+ end
269
+
270
+ def citation_title(title_text)
271
+ prepositions = ["a","about","across","an","and","before","but","by","for","it","of","the","to","with","without"]
272
+ new_text = []
273
+ title_text.split(" ").each_with_index do |word,index|
274
+ if (index == 0 and word != word.upcase) or (word.length > 1 and word != word.upcase and !prepositions.include?(word))
275
+ # the split("-") will handle the capitalization of hyphenated words
276
+ new_text << word.split("-").map!{|w| w.capitalize }.join("-")
277
+ else
278
+ new_text << word
279
+ end
280
+ end
281
+ new_text.join(" ")
282
+ end
283
+
284
+ def setup_title_info
285
+ text = ''
286
+ title = self.title.first
287
+ unless title.blank?
288
+ title = CGI::escapeHTML(title)
289
+ title_info = clean_end_punctuation(title.strip)
290
+ text << title_info
291
+ end
292
+
293
+ return nil if text.strip.blank?
294
+ clean_end_punctuation(text.strip) + "."
295
+ end
296
+
297
+ def clean_end_punctuation(text)
298
+ if [".",",",":",";","/"].include? text[-1,1]
299
+ return text[0,text.length-1]
300
+ end
301
+ text
302
+ end
303
+
304
+ def get_author_list
305
+ self.creator.map { |author| clean_end_punctuation(CGI::escapeHTML(author)) }.uniq
306
+ end
307
+
308
+ def get_all_authors
309
+ authors = self.creator
310
+ return authors.empty? ? nil : authors.map { |author| CGI::escapeHTML(author) }
311
+ end
312
+
313
+ def abbreviate_name(name)
314
+ abbreviated_name = ''
315
+ name = name.join('') if name.is_a? Array
316
+ # make sure we handle "Cher" correctly
317
+ return name if !name.include?(' ') and !name.include?(',')
318
+ surnames_first = name.include?(',')
319
+ delimiter = surnames_first ? ', ' : ' '
320
+ name_segments = name.split(delimiter)
321
+ given_names = surnames_first ? name_segments.last.split(' ') : name_segments.first.split(' ')
322
+ surnames = surnames_first ? name_segments.first.split(' ') : name_segments.last.split(' ')
323
+ abbreviated_name << surnames.join(' ')
324
+ abbreviated_name << ', '
325
+ abbreviated_name << given_names.map { |n| "#{n[0]}." }.join if given_names.is_a? Array
326
+ abbreviated_name << "#{given_names[0]}." if given_names.is_a? String
327
+ abbreviated_name
328
+ end
329
+
330
+ def name_reverse(name)
331
+ name = clean_end_punctuation(name)
332
+ return name unless name =~ /,/
333
+ temp_name = name.split(", ")
334
+ return temp_name.last + " " + temp_name.first
335
+ end
336
+
337
+ end
338
+ end
339
+ end