bplmodels 0.0.91 → 0.0.93
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/app/models/bplmodels/characterization.rb +3 -1
- data/app/models/bplmodels/collection.rb +29 -5
- data/app/models/bplmodels/ereader_file.rb +14 -0
- data/app/models/bplmodels/file.rb +152 -12
- data/app/models/bplmodels/institution.rb +73 -1
- data/app/models/bplmodels/marc_file.rb +14 -0
- data/app/models/bplmodels/mods_desc_metadata.rb +124 -8
- data/app/models/bplmodels/object_base.rb +692 -252
- data/app/models/bplmodels/page_metadata.rb +56 -0
- data/app/models/bplmodels/transcription_file.rb +14 -0
- data/app/models/bplmodels/volume.rb +37 -0
- data/app/models/bplmodels/workflow_metadata.rb +6 -0
- data/lib/active_fedora/rdf/indexing_service.rb +88 -0
- data/lib/active_fedora/rdf/term.rb +189 -0
- data/lib/bplmodels/constants.rb +1 -0
- data/lib/bplmodels/datastream_input_funcs.rb +4 -1
- data/lib/bplmodels/finder.rb +104 -0
- data/lib/bplmodels/version.rb +1 -1
- metadata +25 -18
@@ -77,6 +77,18 @@ module Bplmodels
|
|
77
77
|
}
|
78
78
|
}
|
79
79
|
|
80
|
+
#CLASSIFICATION--------------------------------------------------------
|
81
|
+
t.classification(:path => 'classification') {
|
82
|
+
t.displayLabel :path=>{:attribute=>'displayLabel'}
|
83
|
+
t.edition :path =>{:attribute=>"edition"}
|
84
|
+
::Mods::AUTHORITY_ATTRIBS.each { |attr_name|
|
85
|
+
t.send attr_name, :path =>{:attribute=>"#{attr_name}"}
|
86
|
+
}
|
87
|
+
::Mods::LANG_ATTRIBS.each { |attr_name|
|
88
|
+
t.send attr_name, :path =>{:attribute=>"#{attr_name}"}
|
89
|
+
}
|
90
|
+
}
|
91
|
+
|
80
92
|
=begin
|
81
93
|
# CLASSIFICATION -------------------------------------------------------------------------
|
82
94
|
t.classification(:path => 'mods/oxns:classification') {
|
@@ -289,6 +301,8 @@ module Bplmodels
|
|
289
301
|
t.authorityURI(:path=>{:attribute=>"authorityURI"})
|
290
302
|
t.valueURI(:path=>{:attribute=>"valueURI"})
|
291
303
|
t.subtitle(:path=>"subTitle", :label=>"subtitle")
|
304
|
+
t.part_number(:path=>"partNumber", :label=>"partNumber")
|
305
|
+
t.part_name(:path=>"partName", :label=>"partName")
|
292
306
|
}
|
293
307
|
t.title(:proxy=>[:title_info, :main_title])
|
294
308
|
|
@@ -322,6 +336,14 @@ module Bplmodels
|
|
322
336
|
t.issuance(:path=>"issuance")
|
323
337
|
t.edition(:path=>"edition")
|
324
338
|
t.event_type(:path=>{:attribute=>"eventType"})
|
339
|
+
t.frequency(:path=>"frequency") {
|
340
|
+
t.authority(:path=>{:attribute=>"authority"})
|
341
|
+
}
|
342
|
+
}
|
343
|
+
|
344
|
+
t.target_audience(:path=>"targetAudience") {
|
345
|
+
t.authority(:path=>{:attribute=>"authority"})
|
346
|
+
t.display_label(:path=>{:attribute=>"displayLabel"})
|
325
347
|
}
|
326
348
|
|
327
349
|
t.item_location(:path=>"location") {
|
@@ -353,6 +375,7 @@ module Bplmodels
|
|
353
375
|
t.local_accession :path => 'identifier', :attributes => { :type => "local-accession" }
|
354
376
|
t.local_call :path => 'identifier', :attributes => { :type => "local-call" }
|
355
377
|
t.local_barcode :path => 'identifier', :attributes => { :type => "local-barcode" }
|
378
|
+
t.ia_id :path => 'identifier', :attributes => { :type => "internet-archive" }
|
356
379
|
t.identifier_uri :path => 'identifier', :attributes => { :type => "uri" }
|
357
380
|
|
358
381
|
t.physical_description(:path=>"physicalDescription") {
|
@@ -556,7 +579,9 @@ module Bplmodels
|
|
556
579
|
}
|
557
580
|
|
558
581
|
|
559
|
-
t.table_of_contents(:path=>'tableOfContents')
|
582
|
+
t.table_of_contents(:path=>'tableOfContents') {
|
583
|
+
t.href(:path=>{:attribute=>'xlink:href'})
|
584
|
+
}
|
560
585
|
|
561
586
|
|
562
587
|
end
|
@@ -688,6 +713,13 @@ module Bplmodels
|
|
688
713
|
self.mods(0).accessCondition(access_index).displayLabel = displayLabel unless displayLabel.blank?
|
689
714
|
end
|
690
715
|
|
716
|
+
def insert_target_audience(value=nil, authority=nil, display_label=nil)
|
717
|
+
audience_index = self.mods(0).target_audience.count
|
718
|
+
self.mods(0).target_audience(audience_index, value) unless value.blank?
|
719
|
+
self.mods(0).target_audience(audience_index).authority = authority unless authority.blank?
|
720
|
+
self.mods(0).target_audience(audience_index).display_label = display_label unless display_label.blank?
|
721
|
+
end
|
722
|
+
|
691
723
|
|
692
724
|
def insert_type_of_resource(value=nil, manuscript=nil)
|
693
725
|
resource_index = self.mods(0).type_of_resource.count
|
@@ -824,6 +856,13 @@ module Bplmodels
|
|
824
856
|
self.mods(0).origin_info(origin_index).event_type = event_type unless event_type.blank?
|
825
857
|
end
|
826
858
|
|
859
|
+
def insert_origin_frequency(frequency, authority)
|
860
|
+
#Currently only supporting one elements...
|
861
|
+
origin_index = 0
|
862
|
+
self.mods(0).origin_info(origin_index).frequency = frequency unless frequency.blank?
|
863
|
+
self.mods(0).origin_info(origin_index).frequency.authority = authority unless authority.blank?
|
864
|
+
end
|
865
|
+
|
827
866
|
def insert_tgn(tgn_id)
|
828
867
|
puts 'TGN ID is: ' + tgn_id
|
829
868
|
|
@@ -947,7 +986,7 @@ module Bplmodels
|
|
947
986
|
end
|
948
987
|
|
949
988
|
#usage=nil, supplied=nil, subtitle=nil, language=nil, type=nil, authority=nil, authorityURI=nil, valueURI=nil
|
950
|
-
def insert_title(nonSort=nil, main_title=nil, usage=nil, supplied=nil, type=nil, subtitle=nil, language=nil, display_label=nil, args={})
|
989
|
+
def insert_title(nonSort=nil, main_title=nil, usage=nil, supplied=nil, type=nil, subtitle=nil, language=nil, display_label=nil, part_number=nil, part_name=nil, args={})
|
951
990
|
title_index = self.mods(0).title_info.count
|
952
991
|
|
953
992
|
self.mods(0).title_info(title_index).nonSort = nonSort unless nonSort.blank?
|
@@ -976,6 +1015,10 @@ module Bplmodels
|
|
976
1015
|
|
977
1016
|
self.mods(0).title_info(title_index).display_label = display_label unless display_label.blank?
|
978
1017
|
|
1018
|
+
self.mods(0).title_info(title_index).part_number = part_number unless part_number.blank?
|
1019
|
+
|
1020
|
+
self.mods(0).title_info(title_index).part_name = part_name unless part_name.blank?
|
1021
|
+
|
979
1022
|
if args.present?
|
980
1023
|
raise 'broken args in Active Fedora 7'
|
981
1024
|
end
|
@@ -1104,6 +1147,66 @@ module Bplmodels
|
|
1104
1147
|
|
1105
1148
|
|
1106
1149
|
|
1150
|
+
self.insert_note(converted[:date_note],"date") unless !converted.has_key?(:date_note)
|
1151
|
+
|
1152
|
+
end
|
1153
|
+
|
1154
|
+
def insert_oai_date_copyright(date)
|
1155
|
+
#converted = Bplmodels::DatastreamInputFuncs.convert_to_mods_date(date)
|
1156
|
+
converted = BplEnrich::Dates.standardize(date)
|
1157
|
+
|
1158
|
+
#date_index = self.date.length
|
1159
|
+
date_index = 0
|
1160
|
+
dup_found = false
|
1161
|
+
|
1162
|
+
#Prevent duplicate entries... Using a flag as keep the potential note?
|
1163
|
+
(self.mods(0).date(date_index).dates_copyright.length-1).times do |index|
|
1164
|
+
if converted.has_key?(:single_date)
|
1165
|
+
if self.mods(0).date(date_index).dates_copyright(index).point.blank? && self.mods(0).date(date_index).dates_copyright(index).first == converted[:single_date]
|
1166
|
+
dup_found = true
|
1167
|
+
end
|
1168
|
+
elsif converted.has_key?(:date_range)
|
1169
|
+
if self.mods(0).date(date_index).dates_copyright(index).point == 'start' && self.mods(0).date(date_index).dates_copyright(index).first == converted[:date_range][:start]
|
1170
|
+
if self.mods(0).date(date_index).dates_copyright(index+1).point == 'end' && self.mods(0).date(date_index).dates_copyright(index+1).first == converted[:date_range][:end]
|
1171
|
+
dup_found = true
|
1172
|
+
end
|
1173
|
+
|
1174
|
+
end
|
1175
|
+
end
|
1176
|
+
end
|
1177
|
+
|
1178
|
+
if !dup_found
|
1179
|
+
if converted.has_key?(:single_date) && !self.date.dates_copyright.include?(converted[:single_date])
|
1180
|
+
date_created_index = self.date(date_index).dates_copyright.length
|
1181
|
+
self.date(date_index).dates_copyright(date_created_index, converted[:single_date])
|
1182
|
+
self.date(date_index).dates_copyright(date_created_index).encoding = 'w3cdtf'
|
1183
|
+
if date_created_index == 0
|
1184
|
+
self.date(date_index).dates_copyright(date_created_index).key_date = 'yes'
|
1185
|
+
end
|
1186
|
+
|
1187
|
+
if converted.has_key?(:date_qualifier)
|
1188
|
+
self.date(date_index).dates_copyright(date_created_index).qualifier = converted[:date_qualifier]
|
1189
|
+
end
|
1190
|
+
elsif converted.has_key?(:date_range)
|
1191
|
+
date_created_index = self.date(date_index).dates_copyright.length
|
1192
|
+
self.date(date_index).dates_copyright(date_created_index, converted[:date_range][:start])
|
1193
|
+
self.date(date_index).dates_copyright(date_created_index).encoding = 'w3cdtf'
|
1194
|
+
if date_created_index == 0
|
1195
|
+
self.date(date_index).dates_copyright(date_created_index).key_date = 'yes'
|
1196
|
+
end
|
1197
|
+
self.date(date_index).dates_copyright(date_created_index).point = 'start'
|
1198
|
+
self.date(date_index).dates_copyright(date_created_index).qualifier = converted[:date_qualifier]
|
1199
|
+
|
1200
|
+
date_created_index = self.date(date_index).dates_copyright.length
|
1201
|
+
self.date(date_index).dates_copyright(date_created_index, converted[:date_range][:end])
|
1202
|
+
self.date(date_index).dates_copyright(date_created_index).encoding = 'w3cdtf'
|
1203
|
+
self.date(date_index).dates_copyright(date_created_index).point = 'end'
|
1204
|
+
self.date(date_index).dates_copyright(date_created_index).qualifier = converted[:date_qualifier]
|
1205
|
+
end
|
1206
|
+
end
|
1207
|
+
|
1208
|
+
|
1209
|
+
|
1107
1210
|
self.insert_note(converted[:date_note],"date") unless !converted.has_key?(:date_note)
|
1108
1211
|
|
1109
1212
|
end
|
@@ -1639,9 +1742,10 @@ module Bplmodels
|
|
1639
1742
|
self.find_by_terms(:subject_cartographic).slice(index.to_i).remove
|
1640
1743
|
end
|
1641
1744
|
|
1642
|
-
def insert_table_of_contents(
|
1745
|
+
def insert_table_of_contents(text_value, url=nil)
|
1643
1746
|
contents_index = self.mods(0).table_of_contents.count
|
1644
|
-
self.mods(0).table_of_contents(contents_index,
|
1747
|
+
self.mods(0).table_of_contents(contents_index, text_value) unless text_value.blank?
|
1748
|
+
self.mods(0).table_of_contents(contents_index).href = url unless url.blank?
|
1645
1749
|
end
|
1646
1750
|
|
1647
1751
|
def remove_table_of_contents(index)
|
@@ -1765,16 +1869,28 @@ module Bplmodels
|
|
1765
1869
|
def insert_identifier(identifier=nil, type=nil, display_label=nil, invalid=nil)
|
1766
1870
|
identifier_index = self.mods(0).identifier.count
|
1767
1871
|
|
1768
|
-
|
1769
|
-
|
1770
|
-
|
1771
|
-
|
1872
|
+
if identifier.present?
|
1873
|
+
self.mods(0).identifier(identifier_index, identifier) unless identifier.blank?
|
1874
|
+
self.mods(0).identifier(identifier_index).type_at = type unless type.blank?
|
1875
|
+
self.mods(0).identifier(identifier_index).displayLabel = display_label unless display_label.blank?
|
1876
|
+
self.mods(0).identifier(identifier_index).invalid = 'yes' if (invalid.present? && invalid == 'yes')
|
1877
|
+
end
|
1772
1878
|
end
|
1773
1879
|
|
1774
1880
|
def remove_identifier(index)
|
1775
1881
|
self.find_by_terms(:identifier).slice(index.to_i).remove
|
1776
1882
|
end
|
1777
1883
|
|
1884
|
+
def insert_classification(value=nil, edition=nil, authority=nil, display_label=nil)
|
1885
|
+
classification_index = self.mods(0).classification.count
|
1886
|
+
if value.present?
|
1887
|
+
self.mods(0).classification(classification_index, value)
|
1888
|
+
self.mods(0).classification(classification_index).edition = edition unless edition.blank?
|
1889
|
+
self.mods(0).classification(classification_index).authority = authority unless authority.blank?
|
1890
|
+
self.mods(0).classification(classification_index).displayLabel = display_label unless display_label.blank?
|
1891
|
+
end
|
1892
|
+
end
|
1893
|
+
|
1778
1894
|
|
1779
1895
|
define_template :mcgreevy do |xml|
|
1780
1896
|
xml.recordInfo {
|
@@ -13,20 +13,33 @@ module Bplmodels
|
|
13
13
|
|
14
14
|
has_many :document_files, :class_name => "Bplmodels::DocumentFile", :property=> :is_document_of
|
15
15
|
|
16
|
+
has_many :ereader_files, :class_name => "Bplmodels::EreaderFile", :property=> :is_ereader_of
|
17
|
+
|
16
18
|
has_many :files, :class_name => "Bplmodels::File", :property=> :is_file_of
|
17
19
|
|
18
20
|
|
19
21
|
|
20
22
|
belongs_to :institution, :class_name => 'Bplmodels::Institution', :property => :is_member_of
|
21
23
|
|
22
|
-
|
24
|
+
has_and_belongs_to_many :collection, :class_name => 'Bplmodels::Collection', :property => :is_member_of_collection
|
25
|
+
|
26
|
+
#has_and_belongs_to_many :organization, :class_name => 'Bplmodels::Collection', :property => :is_member_of_collection
|
27
|
+
|
28
|
+
belongs_to :admin_set, :class_name => 'Bplmodels::Collection', :property => :administrative_set
|
23
29
|
|
24
|
-
belongs_to :organization, :class_name => 'Bplmodels::Collection', :property => :is_member_of_collection
|
25
30
|
has_and_belongs_to_many :members, :class_name=> "Bplmodels::Collection", :property=> :hasSubset
|
26
31
|
|
27
32
|
has_metadata :name => "descMetadata", :type => ModsDescMetadata
|
28
33
|
has_metadata :name => "workflowMetadata", :type => WorkflowMetadata
|
29
34
|
|
35
|
+
has_file_datastream 'marc', versionable: false, label: 'MARC metadata'
|
36
|
+
has_file_datastream 'marcXML', versionable: false, label: 'MARC XML metadata'
|
37
|
+
has_file_datastream 'iaMeta', versionable: false, label: 'Internet Archive metadata'
|
38
|
+
has_file_datastream 'scanData', versionable: false, label: 'Internet Archive scanData metadata'
|
39
|
+
has_file_datastream 'plainText', versionable: false, label: 'Plain Text representation of this object'
|
40
|
+
has_file_datastream 'djvuXML', versionable: false, label: 'XML version of DJVU output'
|
41
|
+
has_file_datastream 'abbyy', versionable: false, label: 'Abbyy OCR of this object'
|
42
|
+
|
30
43
|
# Uses the Hydra Rights Metadata Schema for tracking access permissions & copyright
|
31
44
|
has_metadata :name => "rightsMetadata", :type => Hydra::Datastream::RightsMetadata
|
32
45
|
|
@@ -69,6 +82,10 @@ module Bplmodels
|
|
69
82
|
self.add_relationship(:oai_item_id, "oai:digitalcommonwealth.org:" + self.pid, true)
|
70
83
|
end
|
71
84
|
|
85
|
+
def remove_oai_relationships
|
86
|
+
self.remove_relationship(:oai_item_id, "oai:digitalcommonwealth.org:" + self.pid, true)
|
87
|
+
end
|
88
|
+
|
72
89
|
#alias :limited_delete :delete
|
73
90
|
|
74
91
|
=begin
|
@@ -86,6 +103,55 @@ module Bplmodels
|
|
86
103
|
}
|
87
104
|
end
|
88
105
|
end
|
106
|
+
|
107
|
+
#FIXME: What if this is interuppted? Need to do this better...
|
108
|
+
#Broken so no match for now
|
109
|
+
if self.class.name == "Bplmodels::Volume2"
|
110
|
+
next_object = nil
|
111
|
+
previous_object = nil
|
112
|
+
#volume_object = Bplmodels::Finder.getVolumeObjects(self.pid)
|
113
|
+
self.relationships.each_statement do |statement|
|
114
|
+
puts statement.predicate
|
115
|
+
if statement.predicate == "http://projecthydra.org/ns/relations#isPrecedingVolumeOf"
|
116
|
+
next_object = ActiveFedora::Base.find(statement.object.to_s.split('/').last).adapt_to_cmodel
|
117
|
+
elsif statement.predicate == "http://projecthydra.org/ns/relations#isFollowingVolumeOf"
|
118
|
+
previous_object = ActiveFedora::Base.find(statement.object.to_s.split('/').last).adapt_to_cmodel
|
119
|
+
end
|
120
|
+
end
|
121
|
+
|
122
|
+
if next_object.present? and previous_object.present?
|
123
|
+
next_object.relationships.each_statement do |statement|
|
124
|
+
if statement.predicate == "http://projecthydra.org/ns/relations#isFollowingVolumeOf"
|
125
|
+
next_object.remove_relationship(:is_following_volume_of, statement.object)
|
126
|
+
end
|
127
|
+
end
|
128
|
+
|
129
|
+
previous_object.relationships.each_statement do |statement|
|
130
|
+
if statement.predicate == "http://projecthydra.org/ns/relations#isPrecedingVolumeOf"
|
131
|
+
previous_object.remove_relationship(:is_preceding_volume_of, statement.object)
|
132
|
+
end
|
133
|
+
end
|
134
|
+
|
135
|
+
next_object.add_relationship(:is_following_volume_of, "info:fedora/#{previous_object.pid}", true)
|
136
|
+
previous_object.add_relationship(:is_preceding_volume_of, "info:fedora/#{next_object.pid}", true)
|
137
|
+
next_object.save
|
138
|
+
previous_object.save
|
139
|
+
elsif next_object.present? and previous_object.blank?
|
140
|
+
next_object.relationships.each_statement do |statement|
|
141
|
+
if statement.predicate == "http://projecthydra.org/ns/relations#isFollowingVolumeOf"
|
142
|
+
next_object.remove_relationship(:is_following_volume_of, statement.object)
|
143
|
+
end
|
144
|
+
end
|
145
|
+
|
146
|
+
elsif next_object.blank? and previous_object.present?
|
147
|
+
previous_object.relationships.each_statement do |statement|
|
148
|
+
if statement.predicate == "http://projecthydra.org/ns/relations#isPrecedingVolumeOf"
|
149
|
+
previous_object.remove_relationship(:is_preceding_volume_of, statement.object)
|
150
|
+
end
|
151
|
+
end
|
152
|
+
end
|
153
|
+
self.collection.first.update_index
|
154
|
+
end
|
89
155
|
super()
|
90
156
|
end
|
91
157
|
|
@@ -95,9 +161,10 @@ module Bplmodels
|
|
95
161
|
#if !self.instance_of?(klass)
|
96
162
|
adapted_object = self.adapt_to(klass)
|
97
163
|
|
98
|
-
|
164
|
+
adapted_object.relationships.each_statement do |statement|
|
99
165
|
if statement.predicate == "info:fedora/fedora-system:def/model#hasModel"
|
100
|
-
|
166
|
+
adapted_object.remove_relationship(:has_model, statement.object)
|
167
|
+
#puts statement.object
|
101
168
|
end
|
102
169
|
end
|
103
170
|
|
@@ -124,7 +191,8 @@ module Bplmodels
|
|
124
191
|
|
125
192
|
|
126
193
|
doc['label_ssim'] = self.label.to_s
|
127
|
-
|
194
|
+
|
195
|
+
# dates
|
128
196
|
doc['date_start_dtsi'] = []
|
129
197
|
doc['date_start_tsim'] = []
|
130
198
|
doc['date_end_dtsi'] = []
|
@@ -132,16 +200,23 @@ module Bplmodels
|
|
132
200
|
doc['date_facet_ssim'] = []
|
133
201
|
doc['date_type_ssm'] = []
|
134
202
|
doc['date_start_qualifier_ssm'] = []
|
203
|
+
doc['note_date_tsim'] = []
|
135
204
|
dates_static = []
|
136
205
|
dates_start = []
|
137
206
|
dates_end = []
|
138
207
|
|
208
|
+
# these values get appended to dates in _dtsi Solr format
|
209
|
+
start_date_suffix_for_yyyy = '-01-01T00:00:00.000Z'
|
210
|
+
start_date_suffix_for_yyyymm = '-01T01:00:00.000Z'
|
211
|
+
start_date_suffix_for_yyyymmdd = 'T00:00:00.000Z'
|
212
|
+
end_date_suffix_for_yyyy = '-12-31T23:59:59.999Z'
|
213
|
+
end_date_suffix_for_yyyymm = '-28T23:59:59.999Z' # TODO: end DD value should depend on MM value ('31' for Jan., '28' for Feb., etc.)
|
214
|
+
end_date_suffix_for_yyyymmdd = 'T23:59:59.999Z'
|
215
|
+
|
139
216
|
# dateOther
|
140
|
-
if self.descMetadata.date(0).date_other[0]
|
141
|
-
|
142
|
-
|
143
|
-
else
|
144
|
-
# TODO insert code for date_other values here
|
217
|
+
if self.descMetadata.date(0).date_other[0]
|
218
|
+
self.descMetadata.date(0).date_other.each do |date_other|
|
219
|
+
doc['note_date_tsim'] << date_other
|
145
220
|
end
|
146
221
|
end
|
147
222
|
|
@@ -151,8 +226,8 @@ module Bplmodels
|
|
151
226
|
#dateCreated
|
152
227
|
if self.descMetadata.date(0).dates_created[0]
|
153
228
|
self.descMetadata.date(0).dates_created.each_with_index do |date,index|
|
154
|
-
|
155
|
-
|
229
|
+
#FIXME: Has to add "date.present" and the when '' case for oai-test:h415pc718
|
230
|
+
if date.present?
|
156
231
|
case self.descMetadata.date(0).dates_created(index).point[0]
|
157
232
|
when nil, ''
|
158
233
|
dates_static << date
|
@@ -216,48 +291,63 @@ module Bplmodels
|
|
216
291
|
end
|
217
292
|
|
218
293
|
# set the date ranges for date-time fields and decade faceting
|
219
|
-
|
294
|
+
sorted_start_dates = (dates_static + dates_start).sort
|
295
|
+
earliest_date = sorted_start_dates[0]
|
220
296
|
date_facet_start = earliest_date[0..3].to_i
|
221
|
-
|
222
|
-
|
223
|
-
|
224
|
-
|
225
|
-
|
226
|
-
|
227
|
-
|
228
|
-
|
229
|
-
|
297
|
+
latest_date = dates_end.sort.reverse[0]
|
298
|
+
|
299
|
+
# set earliest date
|
300
|
+
if earliest_date =~ /[0-9]{4}[0-9-]*\z/ # rough date format matching
|
301
|
+
if earliest_date.length == 4
|
302
|
+
doc['date_start_dtsi'].append(earliest_date + start_date_suffix_for_yyyy)
|
303
|
+
elsif earliest_date.length == 7
|
304
|
+
doc['date_start_dtsi'].append(earliest_date + start_date_suffix_for_yyyymm)
|
305
|
+
elsif earliest_date.length > 11
|
306
|
+
doc['date_start_dtsi'].append(earliest_date)
|
307
|
+
else
|
308
|
+
doc['date_start_dtsi'].append(earliest_date + start_date_suffix_for_yyyymmdd)
|
309
|
+
end
|
230
310
|
end
|
231
311
|
|
232
|
-
|
233
|
-
|
312
|
+
# set latest date
|
313
|
+
if latest_date && latest_date =~ /[0-9]{4}[0-9-]*\z/
|
234
314
|
date_facet_end = latest_date[0..3].to_i
|
235
315
|
if latest_date.length == 4
|
236
|
-
doc['date_end_dtsi'].append(latest_date +
|
316
|
+
doc['date_end_dtsi'].append(latest_date + end_date_suffix_for_yyyy)
|
237
317
|
elsif latest_date.length == 7
|
238
|
-
|
239
|
-
# e.g., '31' for January, but '28' for February, etc.
|
240
|
-
doc['date_end_dtsi'].append(latest_date + '-28T23:59:59.999Z')
|
318
|
+
doc['date_end_dtsi'].append(latest_date + end_date_suffix_for_yyyymm)
|
241
319
|
elsif latest_date.length > 11
|
242
320
|
doc['date_end_dtsi'].append(latest_date)
|
243
321
|
else
|
244
|
-
doc['date_end_dtsi'].append(latest_date +
|
322
|
+
doc['date_end_dtsi'].append(latest_date + end_date_suffix_for_yyyymmdd)
|
245
323
|
end
|
246
324
|
else
|
247
325
|
date_facet_end = 0
|
326
|
+
latest_start_date = sorted_start_dates[-1]
|
327
|
+
if latest_start_date =~ /[0-9]{4}[0-9-]*\z/
|
328
|
+
if latest_start_date.length == 4
|
329
|
+
doc['date_end_dtsi'].append(latest_start_date + end_date_suffix_for_yyyy)
|
330
|
+
elsif latest_start_date.length == 7
|
331
|
+
doc['date_end_dtsi'].append(latest_start_date + end_date_suffix_for_yyyymm)
|
332
|
+
elsif latest_start_date.length > 11
|
333
|
+
doc['date_end_dtsi'].append(latest_start_date)
|
334
|
+
else
|
335
|
+
doc['date_end_dtsi'].append(latest_start_date + end_date_suffix_for_yyyymmdd)
|
336
|
+
end
|
337
|
+
end
|
248
338
|
end
|
249
339
|
|
250
340
|
# decade faceting
|
251
|
-
(
|
252
|
-
if (
|
341
|
+
(1100..2020).step(10) do |index|
|
342
|
+
if (date_facet_start >= index && date_facet_start < index+10) || (date_facet_end != -1 && index > date_facet_start && date_facet_end >= index)
|
253
343
|
doc['date_facet_ssim'].append(index.to_s + 's')
|
254
344
|
end
|
255
345
|
end
|
256
346
|
|
257
347
|
doc['date_facet_yearly_ssim'] = []
|
258
348
|
# yearly faceting
|
259
|
-
(
|
260
|
-
if (
|
349
|
+
(1100..2020).step(1) do |index|
|
350
|
+
if (date_facet_start >= index && date_facet_start < index+1) || (date_facet_end != -1 && index > date_facet_start && date_facet_end >= index)
|
261
351
|
doc['date_facet_yearly_ssim'].append(index.to_s + 's')
|
262
352
|
end
|
263
353
|
end
|
@@ -293,26 +383,48 @@ module Bplmodels
|
|
293
383
|
doc['identifier_local_barcode_tsim'] = self.descMetadata.local_barcode
|
294
384
|
doc['identifier_isbn_tsim'] = self.descMetadata.isbn
|
295
385
|
doc['identifier_lccn_tsim'] = self.descMetadata.lccn
|
386
|
+
doc['identifier_ia_id_ssi'] = self.descMetadata.ia_id
|
296
387
|
|
297
388
|
doc['identifier_ark_ssi'] = ''
|
298
389
|
|
299
390
|
doc['local_accession_id_tsim'] = self.descMetadata.local_accession[0].to_s
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
391
|
+
|
392
|
+
#Assign collection, admin, and institution labels
|
393
|
+
doc['collection_name_ssim'] = []
|
394
|
+
doc['collection_name_tsim'] = []
|
395
|
+
doc['collection_pid_ssm'] = []
|
396
|
+
|
397
|
+
object_institution_pid = nil
|
398
|
+
object_collections = self.relationships(:is_member_of_collection)
|
399
|
+
object_collections.each do |collection_ident|
|
400
|
+
solr_response_collection = ActiveFedora::Base.find_with_conditions("id"=>collection_ident.gsub('info:fedora/','')).first
|
401
|
+
doc['collection_name_ssim'] << solr_response_collection["label_ssim"].first.to_s
|
402
|
+
doc['collection_name_tsim'] << solr_response_collection["label_ssim"].first.to_s
|
403
|
+
doc['collection_pid_ssm'] << solr_response_collection["id"].to_s
|
404
|
+
|
405
|
+
if object_institution_pid.blank?
|
406
|
+
object_institution_pid = solr_response_collection['institution_pid_ssi']
|
407
|
+
solr_response_institution = ActiveFedora::Base.find_with_conditions("id"=>object_institution_pid).first
|
408
|
+
doc['institution_name_ssim'] = solr_response_institution["label_ssim"].first.to_s
|
409
|
+
doc['institution_name_tsim'] = solr_response_institution["label_ssim"].first.to_s
|
410
|
+
doc['institution_pid_ssi'] = solr_response_institution["id"].to_s
|
411
|
+
doc['institution_pid_si'] = solr_response_institution["id"].to_s
|
311
412
|
end
|
413
|
+
end
|
312
414
|
|
415
|
+
object_admin_set = self.relationships(:administrative_set).first
|
416
|
+
if object_admin_set.present?
|
417
|
+
solr_response_admin = ActiveFedora::Base.find_with_conditions("id"=>object_admin_set.gsub('info:fedora/','')).first
|
418
|
+
doc['admin_set_name_ssim'] = solr_response_admin["label_ssim"].first.to_s
|
419
|
+
doc['admin_set_name_tsim'] = solr_response_admin["label_ssim"].first.to_s
|
420
|
+
doc['admin_set_pid_ssm'] = solr_response_admin["id"].to_s
|
421
|
+
else
|
422
|
+
raise "Potential problem setting administrative set?"
|
313
423
|
end
|
314
424
|
|
315
425
|
|
426
|
+
|
427
|
+
|
316
428
|
#self.descMetadata.identifier_uri.each do |identifier|
|
317
429
|
#if idenfifier.include?("ark")
|
318
430
|
#doc['identifier_uri_ss'] = self.descMetadata.identifier_uri
|
@@ -333,6 +445,8 @@ module Bplmodels
|
|
333
445
|
|
334
446
|
doc['issuance_tsim'] = self.descMetadata.origin_info.issuance
|
335
447
|
|
448
|
+
doc['classification_tsim'] = self.descMetadata.classification
|
449
|
+
|
336
450
|
doc['lang_term_ssim'] = self.descMetadata.language.language_term
|
337
451
|
#doc['lang_val_uri_ssim'] = self.descMetadata.language.language_term.lang_val_uri
|
338
452
|
|
@@ -396,7 +510,7 @@ module Bplmodels
|
|
396
510
|
doc['name_personal_tsim'].append(self.descMetadata.mods(0).name(index).namePart[0])
|
397
511
|
end
|
398
512
|
if self.descMetadata.mods(0).name(index).role.length > 1
|
399
|
-
doc['name_personal_role_tsim'].append(self.descMetadata.mods(0).name(
|
513
|
+
doc['name_personal_role_tsim'].append(self.descMetadata.mods(0).name(index).role.join('||').gsub(/[\n]\s*/,''))
|
400
514
|
else
|
401
515
|
doc['name_personal_role_tsim'].append(self.descMetadata.mods(0).name(index).role.text[0])
|
402
516
|
end
|
@@ -411,7 +525,7 @@ module Bplmodels
|
|
411
525
|
doc['name_corporate_tsim'].append(corporate_name)
|
412
526
|
end
|
413
527
|
if self.descMetadata.mods(0).name(index).role.length > 1
|
414
|
-
doc['name_corporate_role_tsim'].append(self.descMetadata.mods(0).name(
|
528
|
+
doc['name_corporate_role_tsim'].append(self.descMetadata.mods(0).name(index).role.join('||').gsub(/[\n]\s*/,''))
|
415
529
|
else
|
416
530
|
doc['name_corporate_role_tsim'].append(self.descMetadata.mods(0).name(index).role.text[0])
|
417
531
|
end
|
@@ -423,7 +537,7 @@ module Bplmodels
|
|
423
537
|
doc['name_generic_tsim'].append(self.descMetadata.mods(0).name(index).namePart[0])
|
424
538
|
end
|
425
539
|
if self.descMetadata.mods(0).name(index).role.length > 1
|
426
|
-
doc['name_generic_role_tsim'].append(self.descMetadata.mods(0).name(
|
540
|
+
doc['name_generic_role_tsim'].append(self.descMetadata.mods(0).name(index).role.join('||').gsub(/[\n]\s*/,''))
|
427
541
|
else
|
428
542
|
doc['name_generic_role_tsim'].append(self.descMetadata.mods(0).name(index).role.text[0])
|
429
543
|
end
|
@@ -440,12 +554,12 @@ module Bplmodels
|
|
440
554
|
doc['digital_origin_ssi'] = self.descMetadata.physical_description(0).digital_origin[0]
|
441
555
|
doc['internet_media_type_ssim'] = self.descMetadata.physical_description(0).internet_media_type
|
442
556
|
|
443
|
-
doc['physical_location_ssim'] = self.descMetadata.item_location
|
444
|
-
doc['physical_location_tsim'] = self.descMetadata.item_location
|
557
|
+
doc['physical_location_ssim'] = self.descMetadata.item_location.physical_location
|
558
|
+
doc['physical_location_tsim'] = self.descMetadata.item_location.physical_location
|
445
559
|
|
446
|
-
doc['sub_location_tsim'] = self.descMetadata.item_location
|
560
|
+
doc['sub_location_tsim'] = self.descMetadata.item_location.holding_simple.copy_information.sub_location
|
447
561
|
|
448
|
-
doc['shelf_locator_tsim'] = self.descMetadata.item_location
|
562
|
+
doc['shelf_locator_tsim'] = self.descMetadata.item_location.holding_simple.copy_information.shelf_locator
|
449
563
|
|
450
564
|
doc['subject_topic_tsim'] = self.descMetadata.subject.topic
|
451
565
|
|
@@ -608,52 +722,6 @@ module Bplmodels
|
|
608
722
|
|
609
723
|
end
|
610
724
|
|
611
|
-
=begin
|
612
|
-
new_logger = Logger.new('log/geo_log')
|
613
|
-
new_logger.level = Logger::ERROR
|
614
|
-
|
615
|
-
#Blacklight-maps esque placename_coords
|
616
|
-
0.upto self.descMetadata.subject.length-1 do |subject_index|
|
617
|
-
if self.descMetadata.mods(0).subject(subject_index).cartographics.present? && self.descMetadata.mods(0).subject(subject_index).cartographics.scale.blank?
|
618
|
-
place_name = "Results"
|
619
|
-
|
620
|
-
if self.descMetadata.mods(0).subject(subject_index).authority == ['tgn'] && self.descMetadata.mods(0).subject(subject_index).hierarchical_geographic[0].blank?
|
621
|
-
new_logger.error "Weird Geography for: " + self.pid
|
622
|
-
end
|
623
|
-
|
624
|
-
if self.descMetadata.mods(0).subject(subject_index).authority == ['tgn'] && self.descMetadata.mods(0).subject(subject_index).hierarchical_geographic[0].present?
|
625
|
-
place_locations = []
|
626
|
-
self.descMetadata.mods(0).subject(subject_index).hierarchical_geographic[0].split("\n").each do |split_geo|
|
627
|
-
split_geo = split_geo.strip
|
628
|
-
place_locations << split_geo if split_geo.present? && !split_geo.include?('North and Central America') && !split_geo.include?('United States')
|
629
|
-
end
|
630
|
-
place_name = place_locations.reverse.join(', ')
|
631
|
-
elsif self.descMetadata.mods(0).subject(subject_index).geographic.present?
|
632
|
-
place_name = self.descMetadata.mods(0).subject(subject_index).geographic.first
|
633
|
-
end
|
634
|
-
|
635
|
-
doc['subject_blacklight_maps_ssim'] = "#{place_name}-|-#{self.descMetadata.mods(0).subject(subject_index).cartographics.coordinates[0].split(',').first}-|-#{self.descMetadata.mods(0).subject(subject_index).cartographics.coordinates[0].split(',').last}"
|
636
|
-
end
|
637
|
-
end
|
638
|
-
=end
|
639
|
-
#Blacklight-maps coords only
|
640
|
-
=begin
|
641
|
-
best_coords_found = false
|
642
|
-
0.upto self.descMetadata.subject.length-1 do |subject_index|
|
643
|
-
if self.descMetadata.mods(0).subject(subject_index).cartographics.present?
|
644
|
-
if self.descMetadata.mods(0).subject(subject_index).authority.present? && self.descMetadata.mods(0).subject(subject_index).authority != ['tgn']
|
645
|
-
best_coords_found = true
|
646
|
-
doc['subject_blacklight_maps_coords_ssim'] = self.descMetadata.mods(0).subject(subject_index).cartographics.coordinates[0]
|
647
|
-
end
|
648
|
-
end
|
649
|
-
end
|
650
|
-
0.upto self.descMetadata.subject.length-1 do |subject_index|
|
651
|
-
if self.descMetadata.mods(0).subject(subject_index).cartographics.present? && !best_coords_found
|
652
|
-
doc['subject_blacklight_maps_coords_ssim'] = self.descMetadata.mods(0).subject(subject_index).cartographics.coordinates[0]
|
653
|
-
end
|
654
|
-
end
|
655
|
-
=end
|
656
|
-
|
657
725
|
# name subjects
|
658
726
|
doc['subject_name_personal_tsim'] = []
|
659
727
|
doc['subject_name_corporate_tsim'] = []
|
@@ -687,59 +755,79 @@ module Bplmodels
|
|
687
755
|
|
688
756
|
end
|
689
757
|
|
690
|
-
#doc['subject_facet_ssim'] = self.descMetadata.subject.topic + self.descMetadata.subject.corporate_name.name_part + self.descMetadata.subject.personal_name.name_part
|
691
|
-
|
692
758
|
doc['subject_facet_ssim'].concat(self.descMetadata.subject.topic)
|
693
759
|
|
694
760
|
# temporal subjects
|
695
761
|
if self.descMetadata.subject.temporal.length > 0
|
762
|
+
|
696
763
|
doc['subject_temporal_start_tsim'] = []
|
697
764
|
doc['subject_temporal_start_dtsim'] = []
|
698
765
|
doc['subject_temporal_facet_ssim'] = []
|
766
|
+
doc['subject_temporal_end_tsim'] = []
|
767
|
+
doc['subject_temporal_end_dtsim'] = []
|
699
768
|
subject_date_range_start = []
|
700
769
|
subject_date_range_end = []
|
770
|
+
|
701
771
|
self.descMetadata.subject.temporal.each_with_index do |value,index|
|
702
772
|
if self.descMetadata.subject.temporal.point[index] != 'end'
|
703
|
-
|
704
|
-
|
705
|
-
|
706
|
-
|
707
|
-
|
708
|
-
doc['
|
709
|
-
elsif subject_temporal_start.length == 7
|
710
|
-
doc['subject_temporal_start_dtsim'].append(subject_temporal_start + '-01T01:00:00.000Z')
|
711
|
-
else
|
712
|
-
doc['subject_temporal_start_dtsim'].append(subject_temporal_start + 'T00:00:00.000Z')
|
773
|
+
doc['subject_temporal_start_tsim'] << value
|
774
|
+
subject_date_range_start << value
|
775
|
+
# if there is no accompanying end date, create nil value placeholders
|
776
|
+
unless self.descMetadata.subject.temporal.point[index+1] == 'end'
|
777
|
+
subject_date_range_end << nil
|
778
|
+
doc['subject_temporal_end_tsim'] << 'nil'
|
713
779
|
end
|
714
780
|
else
|
715
|
-
doc['subject_temporal_end_tsim']
|
716
|
-
|
717
|
-
subject_temporal_end = value
|
718
|
-
doc['subject_temporal_end_tsim'].append(subject_temporal_end)
|
719
|
-
subject_date_range_end.append(subject_temporal_end)
|
720
|
-
# subject_temporal_end.length > 4 ? subject_date_range_end.append(subject_temporal_end[0..3]) : subject_date_range_end.append(subject_temporal_end)
|
721
|
-
if subject_temporal_end.length == 4
|
722
|
-
doc['subject_temporal_end_dtsim'].append(subject_temporal_end + '-01-01T00:00:00.000Z')
|
723
|
-
elsif subject_temporal_end.length == 7
|
724
|
-
doc['subject_temporal_end_dtsim'].append(subject_temporal_end + '-01T01:00:00.000Z')
|
725
|
-
else
|
726
|
-
doc['subject_temporal_end_dtsim'].append(subject_temporal_end + 'T00:00:00.000Z')
|
727
|
-
end
|
781
|
+
doc['subject_temporal_end_tsim'] << value
|
782
|
+
subject_date_range_end << value
|
728
783
|
end
|
729
784
|
end
|
730
785
|
|
731
786
|
if subject_date_range_start.length > 0
|
732
787
|
subject_date_range_start.each_with_index do |date_start,index|
|
733
|
-
|
734
|
-
|
788
|
+
formatted_date_subject_start = false
|
789
|
+
formatted_date_subject_end = false
|
790
|
+
if date_start =~ /[0-9]{4}[0-9-]*\z/ # rough check for date formatting
|
791
|
+
formatted_date_subject_start = true
|
792
|
+
if date_start.length == 7
|
793
|
+
doc['subject_temporal_start_dtsim'] << date_start + start_date_suffix_for_yyyymm
|
794
|
+
elsif date_start.length == 10
|
795
|
+
doc['subject_temporal_start_dtsim'] << date_start + start_date_suffix_for_yyyymmdd
|
796
|
+
else
|
797
|
+
doc['subject_temporal_start_dtsim'] << date_start[0..3] + start_date_suffix_for_yyyy
|
798
|
+
end
|
799
|
+
end
|
800
|
+
if subject_date_range_end[index]
|
801
|
+
doc['subject_temporal_facet_ssim'] << date_start[0..3] + '-' + subject_date_range_end[index][0..3]
|
802
|
+
if formatted_date_subject_start
|
803
|
+
if subject_date_range_end[index] =~ /[0-9]{4}[0-9-]*\z/ # rough check for date formatting
|
804
|
+
formatted_date_subject_end = true
|
805
|
+
if subject_date_range_end[index].length == 7
|
806
|
+
doc['subject_temporal_end_dtsim'] << subject_date_range_end[index] + end_date_suffix_for_yyyymm
|
807
|
+
elsif subject_date_range_end[index].length == 10
|
808
|
+
doc['subject_temporal_end_dtsim'] << subject_date_range_end[index] + end_date_suffix_for_yyyymmdd
|
809
|
+
else
|
810
|
+
doc['subject_temporal_end_dtsim'] << subject_date_range_end[index][0..3] + end_date_suffix_for_yyyy
|
811
|
+
end
|
812
|
+
end
|
813
|
+
end
|
735
814
|
else
|
736
|
-
doc['subject_temporal_facet_ssim']
|
815
|
+
doc['subject_temporal_facet_ssim'] << date_start
|
816
|
+
end
|
817
|
+
if formatted_date_subject_start && !formatted_date_subject_end
|
818
|
+
if date_start.length == 7
|
819
|
+
doc['subject_temporal_end_dtsim'] << date_start + end_date_suffix_for_yyyymm
|
820
|
+
elsif date_start.length == 10
|
821
|
+
doc['subject_temporal_end_dtsim'] << date_start + end_date_suffix_for_yyyymmdd
|
822
|
+
else
|
823
|
+
doc['subject_temporal_end_dtsim'] << date_start[0..3] + end_date_suffix_for_yyyy
|
824
|
+
end
|
737
825
|
end
|
738
826
|
end
|
739
827
|
end
|
740
828
|
|
829
|
+
# add temporal subject values to subject facet field
|
741
830
|
doc['subject_facet_ssim'].concat(doc['subject_temporal_facet_ssim'])
|
742
|
-
|
743
831
|
end
|
744
832
|
|
745
833
|
# title subjects
|
@@ -762,7 +850,8 @@ module Bplmodels
|
|
762
850
|
if doc['subject_geo_nonhier_ssim'] && doc['subject_hiergeo_geojson_ssm']
|
763
851
|
doc['subject_geo_nonhier_ssim'].each do |non_hier_geo_subj|
|
764
852
|
doc['subject_hiergeo_geojson_ssm'].each do |hiergeo_geojson_feature|
|
765
|
-
if hiergeo_geojson_feature.match(/#{non_hier_geo_subj}/)
|
853
|
+
#if hiergeo_geojson_feature.match(/#{non_hier_geo_subj}/)
|
854
|
+
if hiergeo_geojson_feature.include?(non_hier_geo_subj)
|
766
855
|
doc['subject_geo_nonhier_ssim'].delete(non_hier_geo_subj)
|
767
856
|
end
|
768
857
|
end
|
@@ -786,12 +875,12 @@ module Bplmodels
|
|
786
875
|
|
787
876
|
doc['note_tsim'] = []
|
788
877
|
doc['note_resp_tsim'] = []
|
789
|
-
doc['note_date_tsim'] = []
|
790
878
|
doc['note_performers_tsim'] = []
|
791
879
|
doc['note_acquisition_tsim'] = []
|
792
880
|
doc['note_ownership_tsim'] = []
|
793
881
|
doc['note_citation_tsim'] = []
|
794
882
|
doc['note_reference_tsim'] = []
|
883
|
+
doc['note_physical_tsim'] = []
|
795
884
|
|
796
885
|
0.upto self.descMetadata.note.length-1 do |index|
|
797
886
|
if self.descMetadata.note(index).type_at.first == 'statement of responsibility'
|
@@ -813,11 +902,10 @@ module Bplmodels
|
|
813
902
|
end
|
814
903
|
end
|
815
904
|
|
816
|
-
|
817
905
|
0.upto self.descMetadata.physical_description.length-1 do |physical_index|
|
818
906
|
0.upto self.descMetadata.physical_description(physical_index).note.length-1 do |note_index|
|
819
907
|
if self.descMetadata.physical_description(physical_index).note(note_index).first != nil
|
820
|
-
doc['
|
908
|
+
doc['note_physical_tsim'].append(self.descMetadata.physical_description(physical_index).note(note_index).first)
|
821
909
|
end
|
822
910
|
end
|
823
911
|
end
|
@@ -833,12 +921,16 @@ module Bplmodels
|
|
833
921
|
if self.descMetadata.mods(0).title_info(index).display_label[0] == 'primary_display'
|
834
922
|
doc['title_info_primary_tsi'] = title_prefix + title_value
|
835
923
|
doc['title_info_primary_ssort'] = title_value
|
924
|
+
doc['title_info_partnum_tsi'] = self.descMetadata.mods(0).title_info(index).part_number
|
925
|
+
doc['title_info_partname_tsi'] = self.descMetadata.mods(0).title_info(index).part_name
|
836
926
|
else
|
837
927
|
doc['title_info_primary_trans_tsim'] << title_prefix + title_value
|
838
928
|
end
|
839
929
|
else
|
840
930
|
doc['title_info_primary_tsi'] = title_prefix + title_value
|
841
931
|
doc['title_info_primary_ssort'] = title_value
|
932
|
+
doc['title_info_partnum_tsi'] = self.descMetadata.mods(0).title_info(index).part_number
|
933
|
+
doc['title_info_partname_tsi'] = self.descMetadata.mods(0).title_info(index).part_name
|
842
934
|
end
|
843
935
|
if self.descMetadata.mods(0).title_info(index).supplied[0] == 'yes'
|
844
936
|
doc['supplied_title_bs'] = 'true'
|
@@ -858,11 +950,6 @@ module Bplmodels
|
|
858
950
|
|
859
951
|
doc['subtitle_tsim'] = self.descMetadata.title_info.subtitle
|
860
952
|
|
861
|
-
if self.collection
|
862
|
-
if self.collection.institutions
|
863
|
-
doc['institution_pid_si'] = self.collection.institutions.pid
|
864
|
-
end
|
865
|
-
end
|
866
953
|
|
867
954
|
if self.workflowMetadata
|
868
955
|
doc['workflow_state_ssi'] = self.workflowMetadata.item_status.state
|
@@ -875,6 +962,57 @@ module Bplmodels
|
|
875
962
|
}
|
876
963
|
end
|
877
964
|
|
965
|
+
=begin
|
966
|
+
ocr_text_normal = ''
|
967
|
+
ocr_text_squished = ''
|
968
|
+
ActiveFedora::Base.find_in_batches('is_image_of_ssim'=>"info:fedora/#{self.pid}") do |group|
|
969
|
+
group.each { |image_file|
|
970
|
+
if image_file['has_ocr_master_ssi'] == 'true'
|
971
|
+
ocr_text_normal += image_file['full_ocr_ssi']
|
972
|
+
ocr_text_squished += image_file['compressed_ocr_ssi']
|
973
|
+
end
|
974
|
+
|
975
|
+
}
|
976
|
+
end
|
977
|
+
|
978
|
+
doc['full_ocr_si'] = ocr_text_normal[0..10000] if ocr_text_normal.present?
|
979
|
+
doc['full_ocr_ssi'] = ocr_text_normal[0..10000] if ocr_text_normal.present?
|
980
|
+
doc['compressed_ocr_si'] = ocr_text_squished[0..10000] if ocr_text_squished.present?
|
981
|
+
doc['compressed_ocr_ssi'] = ocr_text_squished[0..10000] if ocr_text_squished.present?
|
982
|
+
=end
|
983
|
+
|
984
|
+
if self.plainText.present?
|
985
|
+
doc['ocr_tiv'] = self.plainText.content.squish
|
986
|
+
|
987
|
+
pages_ocr_check = Bplmodels::ImageFile.find_with_conditions({"is_image_of_ssim"=>"info:fedora/#{self.pid}","has_ocr_text_bsi"=>"true"}, rows: '1', fl: 'id,has_ocr_text_bsi' )
|
988
|
+
doc['has_searchable_text_bsi'] = true if pages_ocr_check.present?
|
989
|
+
end
|
990
|
+
|
991
|
+
if self.scanData.present?
|
992
|
+
scan_data_xml = Nokogiri::XML(self.scanData.content)
|
993
|
+
#See http://archive.org/download/handbookforkitch00neel (created in 2009) for a record lacking this
|
994
|
+
doc['text_direction_ssi'] = scan_data_xml.xpath("//globalHandedness/page-progression").first.text if scan_data_xml.xpath("//globalHandedness/page-progression").first.present?
|
995
|
+
end
|
996
|
+
|
997
|
+
#Handle the case of multiple volumes...
|
998
|
+
if self.class.name == 'Bplmodels::Book'
|
999
|
+
volume_check = Bplmodels::Finder.getVolumeObjects(self.pid)
|
1000
|
+
if volume_check.present?
|
1001
|
+
doc['ocr_tiv'] = ''
|
1002
|
+
volume_check.each do |volume|
|
1003
|
+
#FIXME!!!
|
1004
|
+
volume_object = ActiveFedora::Base.find(volume['id']).adapt_to_cmodel
|
1005
|
+
doc['ocr_tiv'] += volume_object.plainText.content.squish + ' ' if volume_object.plainText.present?
|
1006
|
+
end
|
1007
|
+
end
|
1008
|
+
end
|
1009
|
+
|
1010
|
+
|
1011
|
+
if self.workflowMetadata.volume_match_md5s.present?
|
1012
|
+
doc['marc_md5_sum_ssi'] = self.workflowMetadata.volume_match_md5s.marc.first
|
1013
|
+
doc['iaMeta_matcher_md5_ssi'] = self.workflowMetadata.volume_match_md5s.iaMeta.first
|
1014
|
+
end
|
1015
|
+
|
878
1016
|
if self.workflowMetadata.marked_for_deletion.present?
|
879
1017
|
doc['marked_for_deletion_bsi'] = self.workflowMetadata.marked_for_deletion.first
|
880
1018
|
doc['marked_for_deletion_reason_ssi'] = self.workflowMetadata.marked_for_deletion.reason.first
|
@@ -898,14 +1036,28 @@ module Bplmodels
|
|
898
1036
|
#local_id_type => type of that local ID
|
899
1037
|
#label => label of the object
|
900
1038
|
#institution_pid => instituional access of this file
|
1039
|
+
#secondary_parent_pids => optional array of additional parent pids
|
901
1040
|
def self.mint(args)
|
902
1041
|
|
1042
|
+
expected_aguments = [:parent_pid, :local_id, :local_id_type, :institution_pid, :secondary_parent_pids]
|
1043
|
+
expected_aguments.each do |arg|
|
1044
|
+
if !args.keys.include?(arg)
|
1045
|
+
raise "Mint called but missing parameter: #{arg}"
|
1046
|
+
end
|
1047
|
+
end
|
1048
|
+
|
903
1049
|
#TODO: Duplication check here to prevent over-writes?
|
904
1050
|
|
905
1051
|
args[:namespace_id] ||= ARK_CONFIG_GLOBAL['namespace_commonwealth_pid']
|
1052
|
+
args[:secondary_parent_pids] ||= []
|
906
1053
|
|
907
|
-
response = Typhoeus::Request.post(ARK_CONFIG_GLOBAL['url'] + "/arks.json", :params => {:ark=>{:parent_pid=>args[:parent_pid], :namespace_ark => ARK_CONFIG_GLOBAL['namespace_commonwealth_ark'], :namespace_id=>args[:namespace_id], :url_base => ARK_CONFIG_GLOBAL['ark_commonwealth_base'], :model_type => self.name, :local_original_identifier=>args[:local_id], :local_original_identifier_type=>args[:local_id_type]}})
|
908
|
-
|
1054
|
+
response = Typhoeus::Request.post(ARK_CONFIG_GLOBAL['url'] + "/arks.json", :params => {:ark=>{:parent_pid=>args[:parent_pid], :secondary_parent_pids=>args[:secondary_parent_pids], :namespace_ark => ARK_CONFIG_GLOBAL['namespace_commonwealth_ark'], :namespace_id=>args[:namespace_id], :url_base => ARK_CONFIG_GLOBAL['ark_commonwealth_base'], :model_type => self.name, :local_original_identifier=>args[:local_id], :local_original_identifier_type=>args[:local_id_type]}})
|
1055
|
+
|
1056
|
+
begin
|
1057
|
+
as_json = JSON.parse(response.body)
|
1058
|
+
rescue => ex
|
1059
|
+
raise('Error in JSON response for minting an object pid.')
|
1060
|
+
end
|
909
1061
|
|
910
1062
|
puts as_json['pid']
|
911
1063
|
|
@@ -926,6 +1078,12 @@ module Bplmodels
|
|
926
1078
|
object = self.new(:pid=>as_json["pid"])
|
927
1079
|
|
928
1080
|
object.add_relationship(:is_member_of_collection, "info:fedora/" + args[:parent_pid])
|
1081
|
+
object.add_relationship(:administrative_set, "info:fedora/" + args[:parent_pid])
|
1082
|
+
|
1083
|
+
args[:secondary_parent_pids].each do |other_collection_pid|
|
1084
|
+
object.add_relationship(:is_member_of_collection, "info:fedora/" + other_collection_pid)
|
1085
|
+
end
|
1086
|
+
|
929
1087
|
object.add_oai_relationships
|
930
1088
|
|
931
1089
|
object.label = args[:label] if args[:label].present?
|
@@ -952,41 +1110,121 @@ module Bplmodels
|
|
952
1110
|
return ARK_CONFIG_GLOBAL['url'] + '/ark:/' + ARK_CONFIG_GLOBAL["namespace_commonwealth_ark"].to_s + "/" + self.pid.split(':').last.to_s
|
953
1111
|
end
|
954
1112
|
|
1113
|
+
def insert_marc(file_content)
|
1114
|
+
self.marc.content = file_content
|
1115
|
+
self.marc.mimeType = 'application/marc'
|
1116
|
+
end
|
1117
|
+
|
1118
|
+
def insert_marc_xml(file_content)
|
1119
|
+
self.marcXML.content = file_content
|
1120
|
+
self.marcXML.mimeType = 'application/xml'
|
1121
|
+
end
|
1122
|
+
|
1123
|
+
def insert_ia_meta(file_content)
|
1124
|
+
self.iaMeta.content = file_content
|
1125
|
+
self.iaMeta.mimeType = 'application/xml'
|
1126
|
+
end
|
1127
|
+
|
1128
|
+
def insert_scan_data(file_content)
|
1129
|
+
self.scanData.content = file_content
|
1130
|
+
self.scanData.mimeType = 'application/xml'
|
1131
|
+
end
|
1132
|
+
|
1133
|
+
def insert_plain_text(file_content)
|
1134
|
+
self.plainText.content = file_content
|
1135
|
+
self.plainText.mimeType = 'text/plain'
|
1136
|
+
end
|
1137
|
+
|
1138
|
+
def insert_djvu_xml(file_content)
|
1139
|
+
self.djvuXML.content = file_content
|
1140
|
+
self.djvuXML.mimeType = 'application/xml'
|
1141
|
+
end
|
1142
|
+
|
1143
|
+
def insert_abbyy(file_content)
|
1144
|
+
self.abbyy.content = file_content
|
1145
|
+
self.abbyy.mimeType = 'application/xml'
|
1146
|
+
end
|
1147
|
+
|
1148
|
+
def simple_insert_file(file_path, file_name, ingest_source, institution_pid, original_file_location=nil, set_exemplary=nil)
|
1149
|
+
files_hash = []
|
1150
|
+
file_hash = {}
|
1151
|
+
file_hash[:datastream] = 'productionMaster'
|
1152
|
+
file_hash[:file_path] = file_path
|
1153
|
+
file_hash[:file_name] = file_name
|
1154
|
+
file_hash[:original_file_location] = original_file_location
|
1155
|
+
files_hash << file_hash
|
1156
|
+
|
1157
|
+
insert_new_file(files_hash, ingest_source, institution_pid, set_exemplary)
|
1158
|
+
end
|
1159
|
+
|
955
1160
|
# Expects a hash of the following keys
|
956
1161
|
# :file_path -> The path to the file
|
957
1162
|
# :datastream -> The datastream for the file
|
958
1163
|
# :file_name -> The name of the file
|
959
|
-
def insert_new_file(files_hash, file_ingest_source, institution_pid)
|
1164
|
+
def insert_new_file(files_hash, file_ingest_source, institution_pid, set_exemplary=nil)
|
960
1165
|
puts files_hash.to_s
|
961
1166
|
|
962
1167
|
raise 'Missing insert_new_file params' if files_hash.first[:file_path].blank? || files_hash.first[:datastream].blank? || files_hash.first[:file_name].blank?
|
963
1168
|
|
964
1169
|
production_master = files_hash.select{ |hash| hash[:datastream] == 'productionMaster' }.first
|
965
1170
|
|
966
|
-
if production_master[:file_name].include?('.tif')
|
1171
|
+
if production_master[:file_name].downcase.include?('.tif')
|
967
1172
|
self.descMetadata.insert_media_type('image/tiff')
|
968
1173
|
self.descMetadata.insert_media_type('image/jpeg')
|
969
1174
|
self.descMetadata.insert_media_type('image/jp2')
|
970
|
-
self.insert_new_image_file(files_hash, institution_pid)
|
971
|
-
elsif production_master[:file_name].include?('.
|
1175
|
+
inserted_obj = self.insert_new_image_file(files_hash, institution_pid,set_exemplary)
|
1176
|
+
elsif production_master[:file_name].downcase.include?('.jp2')
|
1177
|
+
self.descMetadata.insert_media_type('image/jpeg')
|
1178
|
+
self.descMetadata.insert_media_type('image/jp2')
|
1179
|
+
inserted_obj = self.insert_new_image_file(files_hash, institution_pid,set_exemplary)
|
1180
|
+
elsif production_master[:file_name].downcase.include?('.png')
|
1181
|
+
self.descMetadata.insert_media_type('image/png')
|
1182
|
+
self.descMetadata.insert_media_type('image/jpeg')
|
1183
|
+
self.descMetadata.insert_media_type('image/jp2')
|
1184
|
+
inserted_obj = self.insert_new_image_file(files_hash, institution_pid,set_exemplary)
|
1185
|
+
elsif production_master[:file_name].downcase.include?('.mp3')
|
972
1186
|
self.descMetadata.insert_media_type('audio/mpeg')
|
973
|
-
self.insert_new_audio_file(files_hash, institution_pid)
|
974
|
-
elsif production_master[:file_name].include?('.
|
1187
|
+
inserted_obj = self.insert_new_audio_file(files_hash, institution_pid)
|
1188
|
+
elsif production_master[:file_name].downcase.include?('.wav')
|
1189
|
+
self.descMetadata.insert_media_type('audio/x-wav')
|
1190
|
+
inserted_obj = self.insert_new_audio_file(files_hash, institution_pid)
|
1191
|
+
elsif production_master[:file_name].downcase.include?('.aif')
|
1192
|
+
self.descMetadata.insert_media_type('audio/x-aiff')
|
1193
|
+
inserted_obj = self.insert_new_audio_file(files_hash, institution_pid)
|
1194
|
+
elsif production_master[:file_name].downcase.include?('.pdf')
|
975
1195
|
self.descMetadata.insert_media_type('application/pdf')
|
976
|
-
|
1196
|
+
ocr_preproduction_master = files_hash.select{ |hash| hash[:datastream] == 'preProductionNegativeMaster' }.first
|
1197
|
+
=begin
|
1198
|
+
if ocr_preproduction_master.present?
|
1199
|
+
self.descMetadata.insert_media_type('application/vnd.openxmlformats-officedocument.wordprocessingml.document')
|
1200
|
+
end
|
1201
|
+
=end
|
1202
|
+
inserted_obj = self.insert_new_document_file(files_hash, institution_pid,set_exemplary)
|
1203
|
+
elsif production_master[:file_name].downcase.include?('.epub')
|
1204
|
+
self.descMetadata.insert_media_type('application/epub+zip')
|
1205
|
+
inserted_obj = self.insert_new_ereader_file(files_hash, institution_pid)
|
1206
|
+
elsif production_master[:file_name].downcase.include?('.mobi')
|
1207
|
+
self.descMetadata.insert_media_type('application/x-mobipocket-ebook')
|
1208
|
+
inserted_obj = self.insert_new_ereader_file(files_hash, institution_pid)
|
1209
|
+
elsif production_master[:file_name].downcase.include?('daisy.zip')
|
1210
|
+
self.descMetadata.insert_media_type('application/zip')
|
1211
|
+
inserted_obj = self.insert_new_ereader_file(files_hash, institution_pid)
|
977
1212
|
else
|
978
1213
|
self.descMetadata.insert_media_type('image/jpeg')
|
979
1214
|
self.descMetadata.insert_media_type('image/jp2')
|
980
|
-
self.insert_new_image_file(files_hash, institution_pid)
|
1215
|
+
inserted_obj = self.insert_new_image_file(files_hash, institution_pid,set_exemplary)
|
981
1216
|
end
|
982
1217
|
|
983
|
-
self.workflowMetadata.item_source.ingest_origin = file_ingest_source
|
1218
|
+
self.workflowMetadata.item_source.ingest_origin = file_ingest_source if self.workflowMetadata.item_source.ingest_origin.blank?
|
984
1219
|
files_hash.each do |file|
|
985
|
-
|
1220
|
+
original_file_location = file[:original_file_location]
|
1221
|
+
original_file_location ||= file[:file_path]
|
1222
|
+
self.workflowMetadata.insert_file_source(original_file_location,file[:file_name],file[:datastream])
|
986
1223
|
end
|
1224
|
+
inserted_obj
|
987
1225
|
end
|
988
1226
|
|
989
|
-
def insert_new_image_file(files_hash, institution_pid)
|
1227
|
+
def insert_new_image_file(files_hash, institution_pid, set_exemplary)
|
990
1228
|
#raise 'insert new image called with no files or more than one!' if file.blank? || file.is_a?(Array)
|
991
1229
|
|
992
1230
|
puts 'processing image of: ' + self.pid.to_s + ' with file_hash: ' + files_hash.to_s
|
@@ -1002,7 +1240,8 @@ module Bplmodels
|
|
1002
1240
|
if image_file.is_a?(String)
|
1003
1241
|
#Bplmodels::ImageFile.find(last_image_file).delete
|
1004
1242
|
#last_image_file = Bplmodels::ImageFile.mint(:parent_pid=>self.pid, :local_id=>final_file_name, :local_id_type=>'File Name', :label=>final_file_name, :institution_pid=>institution_pid)
|
1005
|
-
return true
|
1243
|
+
#return true
|
1244
|
+
return Bplmodels::ImageFile.find(image_file)
|
1006
1245
|
end
|
1007
1246
|
|
1008
1247
|
files_hash.each_with_index do |file, file_index|
|
@@ -1015,175 +1254,359 @@ module Bplmodels
|
|
1015
1254
|
image_file.send(datastream).mimeType = 'image/tiff'
|
1016
1255
|
elsif file[:file_name].split('.').last.downcase == 'jpg'
|
1017
1256
|
image_file.send(datastream).mimeType = 'image/jpeg'
|
1257
|
+
elsif file[:file_name].split('.').last.downcase == 'jp2'
|
1258
|
+
image_file.send(datastream).mimeType = 'image/jp2'
|
1259
|
+
elsif file[:file_name].split('.').last.downcase == 'png'
|
1260
|
+
image_file.send(datastream).mimeType = 'image/png'
|
1261
|
+
elsif file[:file_name].split('.').last.downcase == 'txt'
|
1262
|
+
image_file.send(datastream).mimeType = 'text/plain'
|
1018
1263
|
else
|
1019
|
-
image_file.send(datastream).mimeType = 'image/jpeg'
|
1264
|
+
#image_file.send(datastream).mimeType = 'image/jpeg'
|
1265
|
+
raise "Could not find a mimeType for #{file[:file_name].split('.').last.downcase}"
|
1020
1266
|
end
|
1021
1267
|
|
1022
|
-
image_file.send(datastream).dsLabel = file[:file_name].gsub(
|
1268
|
+
image_file.send(datastream).dsLabel = file[:file_name].gsub(/\.(tif|TIF|jpg|JPG|jpeg|JPEG|jp2|JP2|png|PNG|txt|TXT)$/, '')
|
1023
1269
|
|
1024
1270
|
#FIXME!!!
|
1025
|
-
|
1271
|
+
original_file_location = file[:original_file_location]
|
1272
|
+
original_file_location ||= file[:file_path]
|
1273
|
+
image_file.workflowMetadata.insert_file_source(original_file_location,file[:file_name],datastream)
|
1026
1274
|
image_file.workflowMetadata.item_status.state = "published"
|
1027
1275
|
image_file.workflowMetadata.item_status.state_comment = "Added via the ingest image object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1028
1276
|
|
1029
1277
|
|
1030
1278
|
end
|
1031
1279
|
|
1032
|
-
|
1033
1280
|
other_images_exist = false
|
1034
1281
|
Bplmodels::ImageFile.find_in_batches('is_image_of_ssim'=>"info:fedora/#{self.pid}", 'is_preceding_image_of_ssim'=>'') do |group|
|
1035
1282
|
group.each { |image_id|
|
1036
|
-
other_images_exist
|
1037
|
-
|
1038
|
-
|
1039
|
-
|
1040
|
-
|
1283
|
+
if other_images_exist
|
1284
|
+
raise 'This object has an error... likely was interupted during a previous ingest so multiple starting files. Pid: ' + self.pid
|
1285
|
+
else
|
1286
|
+
other_images_exist = true
|
1287
|
+
preceding_image = Bplmodels::ImageFile.find(image_id['id'])
|
1288
|
+
preceding_image.add_relationship(:is_preceding_image_of, "info:fedora/#{image_file.pid}", true)
|
1289
|
+
preceding_image.save
|
1290
|
+
image_file.add_relationship(:is_following_image_of, "info:fedora/#{image_id['id']}", true)
|
1291
|
+
end
|
1292
|
+
|
1041
1293
|
}
|
1042
1294
|
end
|
1043
1295
|
|
1044
1296
|
image_file.add_relationship(:is_image_of, "info:fedora/" + self.pid)
|
1045
1297
|
image_file.add_relationship(:is_file_of, "info:fedora/" + self.pid)
|
1046
|
-
|
1298
|
+
|
1299
|
+
if set_exemplary.nil? || set_exemplary
|
1300
|
+
if ActiveFedora::Base.find_with_conditions("is_exemplary_image_of_ssim"=>"info:fedora/#{self.pid}").blank?
|
1301
|
+
image_file.add_relationship(:is_exemplary_image_of, "info:fedora/" + self.pid)
|
1302
|
+
end
|
1303
|
+
end
|
1304
|
+
|
1047
1305
|
|
1048
1306
|
image_file.save
|
1049
1307
|
|
1050
1308
|
image_file
|
1051
1309
|
end
|
1052
1310
|
|
1053
|
-
|
1054
|
-
|
1055
|
-
|
1311
|
+
def insert_new_ereader_file(files_hash, institution_pid)
|
1312
|
+
puts 'processing ereader of: ' + self.pid.to_s + ' with file_hash: ' + files_hash.to_s
|
1313
|
+
|
1314
|
+
production_master = files_hash.select{ |hash| hash[:datastream] == 'productionMaster' }.first
|
1315
|
+
|
1316
|
+
epub_file = Bplmodels::EreaderFile.mint(:parent_pid=>self.pid, :local_id=>production_master[:file_name], :local_id_type=>'File Name', :label=>production_master[:file_name], :institution_pid=>institution_pid)
|
1317
|
+
|
1318
|
+
if epub_file.is_a?(String)
|
1319
|
+
#Bplmodels::ImageFile.find(last_image_file).delete
|
1320
|
+
#last_image_file = Bplmodels::ImageFile.mint(:parent_pid=>self.pid, :local_id=>final_file_name, :local_id_type=>'File Name', :label=>final_file_name, :institution_pid=>institution_pid)
|
1321
|
+
#return true
|
1322
|
+
return Bplmodels::EreaderFile.find(epub_file)
|
1323
|
+
end
|
1324
|
+
|
1325
|
+
files_hash.each_with_index do |file, file_index|
|
1326
|
+
datastream = file[:datastream]
|
1327
|
+
|
1328
|
+
|
1329
|
+
epub_file.send(datastream).content = ::File.open(file[:file_path])
|
1330
|
+
|
1331
|
+
if file[:file_name].split('.').last.downcase == 'epub'
|
1332
|
+
epub_file.send(datastream).mimeType = 'application/epub+zip'
|
1333
|
+
elsif file[:file_name].split('.').last.downcase == 'mobi'
|
1334
|
+
epub_file.send(datastream).mimeType = 'application/x-mobipocket-ebook'
|
1335
|
+
elsif file[:file_name].split('.').last.downcase == 'zip'
|
1336
|
+
epub_file.send(datastream).mimeType = 'application/zip'
|
1337
|
+
elsif file[:file_name].split('.').last.downcase == 'txt'
|
1338
|
+
epub_file.send(datastream).mimeType = 'text/plain'
|
1339
|
+
else
|
1340
|
+
epub_file.send(datastream).mimeType = 'application/epub+zip'
|
1341
|
+
end
|
1342
|
+
|
1343
|
+
epub_file.send(datastream).dsLabel = file[:file_name].gsub(/\.(epub|EPUB|mobi|MOBI|zip|ZIP|txt|TXT)$/, '')
|
1344
|
+
|
1345
|
+
#FIXME!!!
|
1346
|
+
original_file_location = file[:original_file_location]
|
1347
|
+
original_file_location ||= file[:file_path]
|
1348
|
+
epub_file.workflowMetadata.insert_file_source(original_file_location,file[:file_name],datastream)
|
1349
|
+
epub_file.workflowMetadata.item_status.state = "published"
|
1350
|
+
epub_file.workflowMetadata.item_status.state_comment = "Added via the ingest image object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1351
|
+
|
1056
1352
|
|
1057
|
-
uri_file_part = audio_file
|
1058
|
-
#Fix common url errors
|
1059
|
-
if uri_file_part.match(/^http/)
|
1060
|
-
#uri_file_part = uri_file_part.gsub(' ', '%20')
|
1061
|
-
uri_file_part = URI::escape(uri_file_part)
|
1062
1353
|
end
|
1063
1354
|
|
1064
|
-
|
1065
|
-
|
1066
|
-
|
1067
|
-
|
1068
|
-
|
1355
|
+
|
1356
|
+
Bplmodels::EreaderFile.find_in_batches('is_ereader_of_ssim'=>"info:fedora/#{self.pid}", 'is_preceding_ereader_of_ssim'=>'') do |group|
|
1357
|
+
group.each { |ereader_id|
|
1358
|
+
other_images_exist = true
|
1359
|
+
preceding_ereader = Bplmodels::EreaderFile.find(ereader_id['id'])
|
1360
|
+
preceding_ereader.add_relationship(:is_preceding_ereader_of, "info:fedora/#{epub_file.pid}", true)
|
1361
|
+
preceding_ereader.save
|
1362
|
+
epub_file.add_relationship(:is_following_ereader_of, "info:fedora/#{ereader_id['id']}", true)
|
1363
|
+
}
|
1069
1364
|
end
|
1070
1365
|
|
1366
|
+
epub_file.add_relationship(:is_ereader_of, "info:fedora/" + self.pid)
|
1367
|
+
epub_file.add_relationship(:is_file_of, "info:fedora/" + self.pid)
|
1071
1368
|
|
1072
|
-
|
1073
|
-
|
1074
|
-
|
1075
|
-
|
1076
|
-
|
1369
|
+
epub_file.save
|
1370
|
+
|
1371
|
+
epub_file
|
1372
|
+
end
|
1373
|
+
|
1374
|
+
def insert_new_audio_file(files_hash, institution_pid, set_exemplary=false)
|
1375
|
+
production_master = files_hash.select{ |hash| hash[:datastream] == 'productionMaster' }.first
|
1376
|
+
audio_file = Bplmodels::AudioFile.mint(:parent_pid=>self.pid, :local_id=>production_master[:file_name], :local_id_type=>'File Name', :label=>production_master[:file_name], :institution_pid=>institution_pid)
|
1377
|
+
|
1378
|
+
if audio_file.is_a?(String)
|
1379
|
+
#Bplmodels::ImageFile.find(last_image_file).delete
|
1380
|
+
#last_image_file = Bplmodels::ImageFile.mint(:parent_pid=>self.pid, :local_id=>final_file_name, :local_id_type=>'File Name', :label=>final_file_name, :institution_pid=>institution_pid)
|
1381
|
+
#return true
|
1382
|
+
return Bplmodels::AudioFile.find(image_file)
|
1077
1383
|
end
|
1078
1384
|
|
1385
|
+
files_hash.each_with_index do |file, file_index|
|
1386
|
+
datastream = file[:datastream]
|
1387
|
+
|
1388
|
+
|
1389
|
+
audio_file.send(datastream).content = ::File.open(file[:file_path])
|
1390
|
+
|
1391
|
+
if file[:file_name].split('.').last.downcase == 'mp3'
|
1392
|
+
audio_file.send(datastream).mimeType = 'audio/mpeg'
|
1393
|
+
elsif file[:file_name].split('.').last.downcase == 'wav'
|
1394
|
+
audio_file.send(datastream).mimeType = 'audio/x-wav'
|
1395
|
+
elsif file[:file_name].split('.').last.downcase == 'aif'
|
1396
|
+
audio_file.send(datastream).mimeType = 'audio/x-aiff'
|
1397
|
+
elsif file[:file_name].split('.').last.downcase == 'txt'
|
1398
|
+
audio_file.send(datastream).mimeType = 'text/plain'
|
1399
|
+
else
|
1400
|
+
raise "Could not find a mimeType for #{file[:file_name].split('.').last.downcase}"
|
1401
|
+
end
|
1402
|
+
|
1403
|
+
audio_file.send(datastream).dsLabel = file[:file_name].gsub(/\.(mp3|MP3|wav|WAV|aif|AIF|txt|TXT)$/, '')
|
1404
|
+
|
1405
|
+
#FIXME!!!
|
1406
|
+
original_file_location = file[:original_file_location]
|
1407
|
+
original_file_location ||= file[:file_path]
|
1408
|
+
audio_file.workflowMetadata.insert_file_source(original_file_location,file[:file_name],datastream)
|
1409
|
+
audio_file.workflowMetadata.item_status.state = "published"
|
1410
|
+
audio_file.workflowMetadata.item_status.state_comment = "Added via the ingest image object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1411
|
+
|
1412
|
+
|
1413
|
+
end
|
1079
1414
|
|
1080
1415
|
other_audio_exist = false
|
1081
1416
|
Bplmodels::AudioFile.find_in_batches('is_audio_of_ssim'=>"info:fedora/#{self.pid}", 'is_preceding_audio_of_ssim'=>'') do |group|
|
1082
1417
|
group.each { |audio|
|
1083
|
-
other_audio_exist
|
1084
|
-
|
1085
|
-
|
1086
|
-
|
1087
|
-
|
1418
|
+
if other_audio_exist
|
1419
|
+
raise 'This object has an error... likely was interupted during a previous ingest so multiple starting files. Pid: ' + self.pid
|
1420
|
+
else
|
1421
|
+
other_images_exist = true
|
1422
|
+
preceding_audio = Bplmodels::AudioFile.find(audio['id'])
|
1423
|
+
preceding_audio.add_relationship(:is_preceding_image_of, "info:fedora/#{audio_file.pid}", true)
|
1424
|
+
preceding_audio.save
|
1425
|
+
audio_file.add_relationship(:is_following_image_of, "info:fedora/#{audio['id']}", true)
|
1426
|
+
end
|
1427
|
+
|
1088
1428
|
}
|
1089
1429
|
end
|
1090
1430
|
|
1091
|
-
|
1092
|
-
|
1093
|
-
|
1094
|
-
current_audio_file.workflowMetadata.insert_file_path(audio_file)
|
1095
|
-
current_audio_file.workflowMetadata.insert_file_name(final_audio_name)
|
1096
|
-
current_audio_file.workflowMetadata.item_status.state = "published"
|
1097
|
-
current_audio_file.workflowMetadata.item_status.state_comment = "Added via the ingest audio object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1431
|
+
audio_file.add_relationship(:is_audio_of, "info:fedora/" + self.pid)
|
1432
|
+
audio_file.add_relationship(:is_file_of, "info:fedora/" + self.pid)
|
1098
1433
|
|
1099
|
-
|
1434
|
+
audio_file.save
|
1100
1435
|
|
1101
|
-
|
1436
|
+
audio_file
|
1102
1437
|
end
|
1103
1438
|
|
1104
|
-
|
1105
|
-
|
1106
|
-
def insert_new_document_file(document_file, institution_pid)
|
1107
|
-
raise 'document file missing!' if document_file.blank?
|
1108
|
-
|
1109
|
-
puts 'processing document of: ' + self.pid.to_s + ' with file: ' + document_file
|
1110
|
-
|
1111
|
-
uri_file_part = document_file
|
1439
|
+
def insert_new_document_file(files_hash, institution_pid, set_exemplary)
|
1440
|
+
production_master = files_hash.select{ |hash| hash[:datastream] == 'productionMaster' }.first
|
1112
1441
|
|
1442
|
+
#uri_file_part = file
|
1113
1443
|
#Fix common url errors
|
1114
|
-
if uri_file_part.match(/^http/)
|
1115
|
-
#uri_file_part = uri_file_part.gsub(' ', '%20')
|
1116
|
-
uri_file_part = URI::escape(uri_file_part)
|
1117
|
-
end
|
1444
|
+
#uri_file_part = URI::escape(uri_file_part) if uri_file_part.match(/^http/)
|
1118
1445
|
|
1119
|
-
|
1120
|
-
|
1121
|
-
if
|
1122
|
-
Bplmodels::
|
1123
|
-
|
1446
|
+
document_file = Bplmodels::DocumentFile.mint(:parent_pid=>self.pid, :local_id=>production_master[:file_name], :local_id_type=>'File Name', :label=>production_master[:file_name], :institution_pid=>institution_pid)
|
1447
|
+
|
1448
|
+
if document_file.is_a?(String)
|
1449
|
+
#Bplmodels::ImageFile.find(last_image_file).delete
|
1450
|
+
#last_image_file = Bplmodels::ImageFile.mint(:parent_pid=>self.pid, :local_id=>final_file_name, :local_id_type=>'File Name', :label=>final_file_name, :institution_pid=>institution_pid)
|
1124
1451
|
#return true
|
1452
|
+
return Bplmodels::DocumentFile.find(document_file)
|
1125
1453
|
end
|
1126
1454
|
|
1127
|
-
|
1128
|
-
|
1129
|
-
current_document_file.productionMaster.mimeType = 'application/pdf'
|
1130
|
-
else
|
1131
|
-
current_document_file.productionMaster.mimeType = 'application/pdf'
|
1132
|
-
end
|
1455
|
+
files_hash.each_with_index do |file, file_index|
|
1456
|
+
datastream = file[:datastream]
|
1133
1457
|
|
1134
|
-
|
1135
|
-
|
1136
|
-
|
1137
|
-
|
1138
|
-
|
1139
|
-
|
1140
|
-
}.first
|
1141
|
-
total_colors = img.total_colors
|
1142
|
-
current_page = current_page + 1
|
1143
|
-
end
|
1458
|
+
#Fix common url errors
|
1459
|
+
if file[:file_path].match(/^http/)
|
1460
|
+
document_file.send(datastream).content = ::File.open(URI::escape(file[:file_path]))
|
1461
|
+
else
|
1462
|
+
document_file.send(datastream).content = ::File.open(file[:file_path])
|
1463
|
+
end
|
1144
1464
|
|
1145
|
-
#This is horrible. But if you don't do this, some PDF files won't come out right at all.
|
1146
|
-
#Multiple attempts have failed to fix this but perhaps the bug will be patched in ImageMagick.
|
1147
|
-
#To duplicate, one can use the PDF files at: http://libspace.uml.edu/omeka/files/original/7ecb4dc9579b11e2b53ccc2040e58d36.pdf
|
1148
|
-
img = Magick::Image.from_blob( img.to_blob { self.format = "jpg" } ).first
|
1149
1465
|
|
1150
|
-
|
1466
|
+
if file[:file_name].split('.').last.downcase == 'pdf'
|
1467
|
+
document_file.send(datastream).mimeType = 'application/pdf'
|
1468
|
+
elsif file[:file_name].split('.').last.downcase == 'docx'
|
1469
|
+
document_file.send(datastream).mimeType = 'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
|
1470
|
+
elsif file[:file_name].split('.').last.downcase == 'doc'
|
1471
|
+
document_file.send(datastream).mimeType = 'application/msword'
|
1472
|
+
elsif file[:file_name].split('.').last.downcase == 'txt'
|
1473
|
+
document_file.send(datastream).mimeType = 'text/plain'
|
1474
|
+
else
|
1475
|
+
raise "Could not find a mimeType for #{file[:file_name].split('.').last.downcase}"
|
1476
|
+
end
|
1477
|
+
|
1478
|
+
document_file.send(datastream).dsLabel = file[:file_name].gsub(/\.(pdf|PDF|docx|DOCX|doc|DOC|txt|TXT)$/, '')
|
1479
|
+
|
1480
|
+
#FIXME!!!
|
1481
|
+
original_file_location = file[:original_file_location]
|
1482
|
+
original_file_location ||= file[:file_path]
|
1483
|
+
document_file.workflowMetadata.insert_file_source(original_file_location,file[:file_name],datastream)
|
1484
|
+
document_file.workflowMetadata.item_status.state = "published"
|
1485
|
+
document_file.workflowMetadata.item_status.state_comment = "Added via the ingest document object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1486
|
+
|
1487
|
+
|
1488
|
+
end
|
1151
1489
|
|
1152
|
-
current_document_file.thumbnail300.content = thumb.to_blob { self.format = "jpg" }
|
1153
|
-
current_document_file.thumbnail300.mimeType = 'image/jpeg'
|
1154
1490
|
|
1155
1491
|
Bplmodels::DocumentFile.find_in_batches('is_document_of_ssim'=>"info:fedora/#{self.pid}", 'is_preceding_document_of_ssim'=>'') do |group|
|
1156
|
-
group.each { |
|
1157
|
-
|
1158
|
-
preceding_document
|
1159
|
-
preceding_document.add_relationship(:is_preceding_document_of, "info:fedora/#{current_document_file.pid}", true)
|
1492
|
+
group.each { |document_id|
|
1493
|
+
preceding_document = Bplmodels::DocumentFile.find(document_id['id'])
|
1494
|
+
preceding_document.add_relationship(:is_preceding_document_of, "info:fedora/#{document_file.pid}", true)
|
1160
1495
|
preceding_document.save
|
1161
|
-
|
1496
|
+
preceding_document.add_relationship(:is_following_document_of, "info:fedora/#{document_id['id']}", true)
|
1162
1497
|
}
|
1163
1498
|
end
|
1164
1499
|
|
1165
|
-
|
1166
|
-
|
1167
|
-
|
1168
|
-
|
1169
|
-
|
1170
|
-
|
1500
|
+
document_file.add_relationship(:is_image_of, "info:fedora/" + self.pid)
|
1501
|
+
document_file.add_relationship(:is_file_of, "info:fedora/" + self.pid)
|
1502
|
+
|
1503
|
+
if set_exemplary.nil? || set_exemplary
|
1504
|
+
if ActiveFedora::Base.find_with_conditions("is_exemplary_image_of_ssim"=>"info:fedora/#{self.pid}").blank?
|
1505
|
+
document_file.add_relationship(:is_exemplary_image_of, "info:fedora/" + self.pid)
|
1506
|
+
end
|
1171
1507
|
end
|
1172
1508
|
|
1173
|
-
current_document_file.add_relationship(:is_document_of, "info:fedora/" + self.pid)
|
1174
|
-
current_document_file.add_relationship(:is_file_of, "info:fedora/" + self.pid)
|
1175
1509
|
|
1176
|
-
|
1510
|
+
document_file.save
|
1511
|
+
|
1512
|
+
document_file
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
def add_new_volume(pid)
|
1516
|
+
#raise 'insert new image called with no files or more than one!' if file.blank? || file.is_a?(Array)
|
1517
|
+
volume = Bplmodels::Volume.find(pid).adapt_to_cmodel
|
1518
|
+
placement_location = volume.descMetadata.title_info.part_number.first.match(/\d+/).to_s.to_i
|
1519
|
+
|
1520
|
+
other_volumes_exist = false
|
1521
|
+
volume_placed = false
|
1522
|
+
queryed_placement_start_val = 0
|
1523
|
+
|
1524
|
+
volume_objects = Bplmodels::Finder.getVolumeObjects(self.pid)
|
1525
|
+
volume_objects.each do |volume_id|
|
1526
|
+
if !volume_placed
|
1527
|
+
queryed_placement_end_val = volume_id['title_info_partnum_tsi'].match(/\d+/).to_s.to_i
|
1528
|
+
queryed_placement_start_val ||= queryed_placement_end_val
|
1529
|
+
other_volumes_exist = true
|
1530
|
+
|
1531
|
+
#Case of insert at end
|
1532
|
+
if volume_id['is_preceding_volume_of_ssim'].blank? && queryed_placement_end_val < placement_location
|
1533
|
+
preceding_volume = Bplmodels::Volume.find(volume_id['id'])
|
1534
|
+
preceding_volume.add_relationship(:is_preceding_volume_of, "info:fedora/#{pid}", true)
|
1535
|
+
preceding_volume.save
|
1536
|
+
volume.add_relationship(:is_following_volume_of, "info:fedora/#{volume_id['id']}", true)
|
1537
|
+
volume_placed = true
|
1538
|
+
#Case of only 1 element of volume 2... insert at beginning
|
1539
|
+
elsif volume_id['is_preceding_volume_of_ssim'].blank?
|
1540
|
+
following_volume = Bplmodels::Volume.find(volume_id['id'])
|
1541
|
+
following_volume.add_relationship(:is_following_volume_of, "info:fedora/#{pid}", true)
|
1542
|
+
|
1543
|
+
volume.add_relationship(:is_preceding_volume_of, "info:fedora/#{volume_id['id']}", true)
|
1544
|
+
following_volume.save
|
1545
|
+
volume_placed = true
|
1546
|
+
#Case of multiple but insert at front
|
1547
|
+
elsif volume_id['is_following_volume_of_ssim'].blank? && queryed_placement_start_val < placement_location and queryed_placement_end_val > placement_location
|
1548
|
+
following_volume = Bplmodels::Volume.find(volume_id['id'])
|
1549
|
+
following_volume.add_relationship(:is_following_volume_of, "info:fedora/#{pid}", true)
|
1550
|
+
|
1551
|
+
volume.add_relationship(:is_preceding_volume_of, "info:fedora/#{volume_id['id']}", true)
|
1552
|
+
following_volume.save
|
1553
|
+
volume_placed = true
|
1554
|
+
#Normal case
|
1555
|
+
elsif queryed_placement_start_val < placement_location and queryed_placement_end_val > placement_location
|
1556
|
+
following_volume = Bplmodels::Volume.find(volume_id['id'])
|
1557
|
+
preceding_volume = Bplmodels::Volume.find(volume_id['is_preceding_volume_of_ssim'].gsub('info:fedora/', ''))
|
1558
|
+
|
1559
|
+
following_volume.remove_relationship(:is_following_volume_of, "info:fedora/#{preceding_volume.pid}", true)
|
1560
|
+
preceding_volume.remove_relationship(:is_preceding_volume_of, "info:fedora/#{following_volume.pid}", true)
|
1561
|
+
|
1562
|
+
|
1563
|
+
following_volume.add_relationship(:is_following_volume_of, "info:fedora/#{pid}", true)
|
1564
|
+
preceding_volume.add_relationship(:is_preceding_volume_of, "info:fedora/#{pid}", true)
|
1565
|
+
|
1566
|
+
|
1567
|
+
volume.add_relationship(:is_following_volume_of, "info:fedora/#{preceding_volume.pid}", true)
|
1568
|
+
volume.add_relationship(:is_preceding_volume_of, "info:fedora/#{following_volume.pid}", true)
|
1569
|
+
preceding_volume.save
|
1570
|
+
following_volume.save
|
1571
|
+
volume_placed = true
|
1572
|
+
end
|
1573
|
+
end
|
1574
|
+
|
1575
|
+
queryed_placement_start_val = queryed_placement_end_val
|
1576
|
+
end
|
1577
|
+
|
1578
|
+
volume.add_relationship(:is_volume_of, "info:fedora/" + self.pid)
|
1579
|
+
|
1580
|
+
#FIXME: Doesn't work with PDF?
|
1581
|
+
#FIXME: Do this better?
|
1582
|
+
if !other_volumes_exist
|
1583
|
+
ActiveFedora::Base.find_in_batches('is_exemplary_image_of_ssim'=>"info:fedora/#{pid}") do |group|
|
1584
|
+
group.each { |exemplary_solr|
|
1585
|
+
exemplary_image = Bplmodels::File.find(exemplary_solr['id']).adapt_to_cmodel
|
1586
|
+
exemplary_image.add_relationship(:is_exemplary_image_of, "info:fedora/" + self.pid)
|
1587
|
+
exemplary_image.save
|
1588
|
+
}
|
1589
|
+
end
|
1590
|
+
elsif placement_location == 1
|
1591
|
+
if ActiveFedora::Base.find_with_conditions("is_exemplary_image_of_ssim"=>"info:fedora/#{self.pid}").present?
|
1592
|
+
exemplary_to_remove_id = ActiveFedora::Base.find_with_conditions("is_exemplary_image_of_ssim"=>"info:fedora/#{self.pid}").first['id']
|
1593
|
+
exemplary_to_remove = ActiveFedora::Base.find(exemplary_to_remove_id).adapt_to_cmodel
|
1594
|
+
exemplary_to_remove.remove_relationship(:is_exemplary_image_of, "info:fedora/" + exemplary_to_remove_id)
|
1595
|
+
end
|
1596
|
+
|
1597
|
+
ActiveFedora::Base.find_in_batches('is_exemplary_image_of_ssim'=>"info:fedora/#{pid}") do |group|
|
1598
|
+
group.each { |exemplary_solr|
|
1599
|
+
exemplary_image = Bplmodels::File.find(exemplary_solr['id']).adapt_to_cmodel
|
1600
|
+
exemplary_image.add_relationship(:is_exemplary_image_of, "info:fedora/" + self.pid)
|
1601
|
+
exemplary_image.save
|
1602
|
+
}
|
1603
|
+
end
|
1604
|
+
end
|
1177
1605
|
|
1178
|
-
current_document_file.workflowMetadata.insert_file_path(document_file)
|
1179
|
-
current_document_file.workflowMetadata.insert_file_name(final_document_name)
|
1180
|
-
current_document_file.workflowMetadata.item_status.state = "published"
|
1181
|
-
current_document_file.workflowMetadata.item_status.state_comment = "Added via the ingest document object base method on " + Time.new.year.to_s + "/" + Time.new.month.to_s + "/" + Time.new.day.to_s
|
1182
1606
|
|
1183
|
-
|
1607
|
+
volume.save
|
1184
1608
|
|
1185
|
-
|
1186
|
-
current_document_file
|
1609
|
+
volume
|
1187
1610
|
end
|
1188
1611
|
|
1189
1612
|
def deleteAllFiles
|
@@ -1237,5 +1660,22 @@ module Bplmodels
|
|
1237
1660
|
return true
|
1238
1661
|
end
|
1239
1662
|
|
1663
|
+
def cache_invalidate
|
1664
|
+
response = Typhoeus::Request.post(DERIVATIVE_CONFIG_GLOBAL['url'] + "/processor/objectcacheinvalidation.json", :params => {:object_pid=>self.pid, :environment=>Bplmodels.environment})
|
1665
|
+
as_json = JSON.parse(response.body)
|
1666
|
+
|
1667
|
+
if as_json['result'] == "false"
|
1668
|
+
raise "Error Deleting the Cache! Server error!"
|
1669
|
+
end
|
1670
|
+
|
1671
|
+
return true
|
1672
|
+
end
|
1673
|
+
|
1674
|
+
|
1675
|
+
def calculate_volume_match_md5s
|
1676
|
+
self.workflowMetadata.volume_match_md5s.marc = Digest::MD5.hexdigest(self.marc.content)
|
1677
|
+
self.workflowMetadata.volume_match_md5s.iaMeta = Digest::MD5.hexdigest(self.iaMeta.content.gsub(/<\/page_progression>.+$/, '').gsub(/<volume>.+<\/volume>/, ''))
|
1678
|
+
end
|
1679
|
+
|
1240
1680
|
end
|
1241
1681
|
end
|