cul_hydra 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/app/assets/images/cul_hydra/crystal/binary.png +0 -0
- data/app/assets/images/cul_hydra/crystal/document.png +0 -0
- data/app/assets/images/cul_hydra/crystal/file.png +0 -0
- data/app/assets/images/cul_hydra/crystal/file_broken.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_documents.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_images.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_music.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_sound.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_video.png +0 -0
- data/app/assets/images/cul_hydra/crystal/kmultiple.png +0 -0
- data/app/assets/images/cul_hydra/crystal/knotify.png +0 -0
- data/app/assets/images/cul_hydra/crystal/mp3.png +0 -0
- data/app/assets/images/cul_hydra/crystal/multimedia2.png +0 -0
- data/app/assets/images/cul_hydra/crystal/video.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/application.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/code.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/css.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/db.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/directory.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/doc.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/file.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/film.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/flash.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/folder_open.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/html.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/java.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/linux.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/music.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/pdf.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/php.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/picture.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/ppt.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/psd.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/ruby.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/script.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/spinner.gif +0 -0
- data/app/assets/images/cul_hydra/filesystem/txt.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/xls.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/zip.png +0 -0
- data/app/controllers/concerns/cul/hydra/application_id_behavior.rb +43 -0
- data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
- data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
- data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
- data/app/helpers/cul/hydra/ore_proxies_helper_behavior.rb +119 -0
- data/app/helpers/cul/hydra/struct_metadata_helper_behavior.rb +89 -0
- data/app/models/bag_aggregator.rb +7 -0
- data/app/models/concept.rb +23 -0
- data/app/models/concerns/cul/hydra/models.rb +24 -0
- data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
- data/app/models/concerns/cul/hydra/models/common.rb +220 -0
- data/app/models/concerns/cul/hydra/models/image_resource.rb +106 -0
- data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
- data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
- data/app/models/concerns/nfo/common.rb +17 -0
- data/app/models/concerns/nfo/file_data_object.rb +10 -0
- data/app/models/concerns/nfo/folder.rb +10 -0
- data/app/models/concerns/nie/information_element.rb +10 -0
- data/app/models/concerns/ore/proxy.rb +124 -0
- data/app/models/concerns/rdf/cul.rb +77 -0
- data/app/models/concerns/rdf/fcrepo3.rb +360 -0
- data/app/models/concerns/rdf/nfo.rb +807 -0
- data/app/models/concerns/rdf/nie.rb +338 -0
- data/app/models/concerns/rdf/olo.rb +100 -0
- data/app/models/concerns/rdf/ore.rb +101 -0
- data/app/models/concerns/rdf/pimo.rb +605 -0
- data/app/models/concerns/rdf/sc.rb +47 -0
- data/app/models/concerns/sc/canvas.rb +12 -0
- data/app/models/concerns/sc/sequence.rb +21 -0
- data/app/models/content_aggregator.rb +3 -0
- data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
- data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
- data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
- data/app/models/dc_document.rb +39 -0
- data/app/models/generic_aggregator.rb +68 -0
- data/app/models/generic_object.rb +18 -0
- data/app/models/generic_resource.rb +210 -0
- data/app/models/jp2_image_aggregator.rb +34 -0
- data/app/models/mets_structured_aggregator.rb +18 -0
- data/app/models/resource.rb +78 -0
- data/app/models/resource_aggregator.rb +22 -0
- data/app/models/static_audio_aggregator.rb +12 -0
- data/app/models/static_image_aggregator.rb +32 -0
- data/bin/rails +12 -0
- data/config/fedora.yml +17 -0
- data/config/jetty.yml +6 -0
- data/config/locales/ldpd_hydra.en.yml +125 -0
- data/config/predicate_mappings.yml +79 -0
- data/config/solr.yml +8 -0
- data/config/solr_mappings.yml +26 -0
- data/config/solr_value_maps.yml +41 -0
- data/config/subs.yml +17 -0
- data/fixtures/cmodels/ldpd_ADLMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_AESMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_BagAggregator.xml +70 -0
- data/fixtures/cmodels/ldpd_Concept.xml +69 -0
- data/fixtures/cmodels/ldpd_ContentAggregator.xml +70 -0
- data/fixtures/cmodels/ldpd_DynamicAggregator.xml +56 -0
- data/fixtures/cmodels/ldpd_JP2ImageAggregator.xml +60 -0
- data/fixtures/cmodels/ldpd_METSMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_METSStructuredAggregator.xml +53 -0
- data/fixtures/cmodels/ldpd_MODSMetadata.xml +73 -0
- data/fixtures/cmodels/ldpd_MostRecent.xml +46 -0
- data/fixtures/cmodels/ldpd_PTIFImageAggregator.xml +63 -0
- data/fixtures/cmodels/ldpd_Resource.xml +72 -0
- data/fixtures/cmodels/ldpd_RestrictedResource.xml +54 -0
- data/fixtures/cmodels/ldpd_Since.xml +62 -0
- data/fixtures/cmodels/ldpd_StaticAudioAggregator.xml +54 -0
- data/fixtures/cmodels/ldpd_StaticImageAggregator.xml +71 -0
- data/fixtures/cmodels/ldpd_htest.xml +54 -0
- data/fixtures/cmodels/ldpd_nullbind.xml +63 -0
- data/fixtures/cmodels/ldpd_sdef.Aggregator.xml +71 -0
- data/fixtures/cmodels/ldpd_sdef.Core.xml +48 -0
- data/fixtures/cmodels/ldpd_sdef.Image.xml +47 -0
- data/fixtures/cmodels/ldpd_sdef.Metadata.xml +62 -0
- data/fixtures/cmodels/ldpd_sdef.Resource.xml +76 -0
- data/fixtures/cmodels/ldpd_sdef.ZoomingImage.xml +46 -0
- data/fixtures/cmodels/ldpd_sdep.BagAggregator.xml +160 -0
- data/fixtures/cmodels/ldpd_sdep.BagAggregatorCore.xml +221 -0
- data/fixtures/cmodels/ldpd_sdep.ContentAggregatorCore.xml +221 -0
- data/fixtures/cmodels/ldpd_sdep.DynamicAggregator.xml +171 -0
- data/fixtures/cmodels/ldpd_sdep.DynamicAggregatorCore.xml +215 -0
- data/fixtures/cmodels/ldpd_sdep.JP2Image.xml +220 -0
- data/fixtures/cmodels/ldpd_sdep.JP2ImageAggregator.xml +167 -0
- data/fixtures/cmodels/ldpd_sdep.JP2ImageCore.xml +229 -0
- data/fixtures/cmodels/ldpd_sdep.MODSMetadata.xml +158 -0
- data/fixtures/cmodels/ldpd_sdep.MODSMetadataCore.xml +227 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImage.xml +222 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImageAggregator.xml +167 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImageCore.xml +215 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImage.xml +210 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImageAggregator.xml +186 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImageCore.xml +220 -0
- data/fixtures/cmodels/ore_Proxy.xml +50 -0
- data/fixtures/spec/BLOB/test001.jpg +0 -0
- data/fixtures/spec/CUL_DC/dc.xml +5 -0
- data/fixtures/spec/CUL_MODS/mods-001.xml +25 -0
- data/fixtures/spec/CUL_MODS/mods-all.xml +65 -0
- data/fixtures/spec/CUL_MODS/mods-bad-repo.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-created-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-created-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-end-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-issued-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-issued-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-other-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-other-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-range-short-years.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-start-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-dates-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-dates-with-some-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-item.xml +31 -0
- data/fixtures/spec/CUL_MODS/mods-names.xml +35 -0
- data/fixtures/spec/CUL_MODS/mods-notes.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-ns.xml +2 -0
- data/fixtures/spec/CUL_MODS/mods-origin-info.xml +9 -0
- data/fixtures/spec/CUL_MODS/mods-part.xml +22 -0
- data/fixtures/spec/CUL_MODS/mods-physical-description.xml +12 -0
- data/fixtures/spec/CUL_MODS/mods-physical-location.xml +9 -0
- data/fixtures/spec/CUL_MODS/mods-record-info.xml +4 -0
- data/fixtures/spec/CUL_MODS/mods-relateditem-project.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-subjects.xml +73 -0
- data/fixtures/spec/CUL_MODS/mods-textual-date.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-titles.xml +33 -0
- data/fixtures/spec/CUL_MODS/mods-top-level-location-vs-relateditem-location.xml +21 -0
- data/fixtures/spec/CUL_MODS/mods-unmapped-project.xml +7 -0
- data/fixtures/spec/CUL_SOLR/mods-001.xml +1 -0
- data/fixtures/spec/CUL_SOLR/mods-001.yml +30 -0
- data/fixtures/spec/FOXML/content-aggregator.xml +64 -0
- data/fixtures/spec/FOXML/content-cmodel.xml +48 -0
- data/fixtures/spec/FOXML/image-cmodel.xml +48 -0
- data/fixtures/spec/FOXML/resource-max.xml +83 -0
- data/fixtures/spec/FOXML/resource-screen.xml +273 -0
- data/fixtures/spec/FOXML/resource-thumb.xml +86 -0
- data/fixtures/spec/FOXML/static-image-aggregator.xml +31 -0
- data/fixtures/spec/STRUCTMAP/structmap-examples.xml +21 -0
- data/fixtures/spec/STRUCTMAP/structmap-nested.xml +10 -0
- data/fixtures/spec/STRUCTMAP/structmap-recto.xml +4 -0
- data/fixtures/spec/STRUCTMAP/structmap-seq.xml +5 -0
- data/fixtures/spec/STRUCTMAP/structmap-unordered-seq.xml +5 -0
- data/lib/cul_hydra.rb +20 -0
- data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
- data/lib/cul_hydra/controllers.rb +13 -0
- data/lib/cul_hydra/controllers/aggregates.rb +93 -0
- data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
- data/lib/cul_hydra/controllers/catalog.rb +12 -0
- data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
- data/lib/cul_hydra/controllers/datastreams.rb +145 -0
- data/lib/cul_hydra/controllers/helpers.rb +10 -0
- data/lib/cul_hydra/controllers/helpers/active_fedora_helper_behavior.rb +9 -0
- data/lib/cul_hydra/controllers/helpers/application_helper_behavior.rb +16 -0
- data/lib/cul_hydra/controllers/helpers/dc_metadata_helper_behavior.rb +9 -0
- data/lib/cul_hydra/controllers/helpers/hydra_assets_helper_behavior.rb +46 -0
- data/lib/cul_hydra/controllers/helpers/hydra_autocomplete_helper_behavior.rb +35 -0
- data/lib/cul_hydra/controllers/helpers/hydra_uploader_helper_behavior.rb +34 -0
- data/lib/cul_hydra/controllers/helpers/resources_helper_behavior.rb +159 -0
- data/lib/cul_hydra/controllers/resources.rb +161 -0
- data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
- data/lib/cul_hydra/controllers/suggestions.rb +126 -0
- data/lib/cul_hydra/controllers/terms.rb +205 -0
- data/lib/cul_hydra/engine.rb +31 -0
- data/lib/cul_hydra/fedora.rb +41 -0
- data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
- data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
- data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
- data/lib/cul_hydra/indexer.rb +102 -0
- data/lib/cul_hydra/om.rb +7 -0
- data/lib/cul_hydra/om/standard_mods.rb +115 -0
- data/lib/cul_hydra/risearch_members.rb +92 -0
- data/lib/cul_hydra/solrizer.rb +10 -0
- data/lib/cul_hydra/solrizer/extractor.rb +27 -0
- data/lib/cul_hydra/solrizer/mods_fieldable.rb +473 -0
- data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
- data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
- data/lib/cul_hydra/solrizer_patch.rb +172 -0
- data/lib/cul_hydra/version.rb +8 -0
- data/lib/cul_hydra/version.rb~ +8 -0
- data/lib/tasks/cmodel.rake +122 -0
- data/lib/tasks/cul_hydra_dev.rake +54 -0
- data/lib/tasks/index.rake +73 -0
- data/lib/tasks/transform.rake +23 -0
- metadata +503 -0
|
@@ -0,0 +1,10 @@
|
|
|
1
|
+
module Cul
|
|
2
|
+
module Hydra
|
|
3
|
+
module Solrizer
|
|
4
|
+
autoload :Extractor, "cul_hydra/solrizer/extractor"
|
|
5
|
+
autoload :TerminologyBasedSolrizer, "cul_hydra/solrizer/terminology_based_solrizer"
|
|
6
|
+
autoload :ValueMapper, "cul_hydra/solrizer/value_mapper"
|
|
7
|
+
autoload :ModsFieldable, "cul_hydra/solrizer/mods_fieldable"
|
|
8
|
+
end
|
|
9
|
+
end
|
|
10
|
+
end
|
|
@@ -0,0 +1,27 @@
|
|
|
1
|
+
module Cul::Hydra::Solrizer
|
|
2
|
+
class Extractor < ::Solrizer::Extractor
|
|
3
|
+
# Insert +field_value+ for +field_name+ into +solr_doc+
|
|
4
|
+
# Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
|
|
5
|
+
# Ensures that field values are always appended to arrays within the values hash.
|
|
6
|
+
# Ensures that values are run through format_node_value
|
|
7
|
+
# Also ensures that values are unique if specified
|
|
8
|
+
# @param [Hash] solr_doc
|
|
9
|
+
# @param [String] field_name
|
|
10
|
+
# @param [String] field_value
|
|
11
|
+
# @param [boolean] unique
|
|
12
|
+
def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
|
13
|
+
formatted_value = self.format_node_value(field_value)
|
|
14
|
+
if solr_doc.has_key?(field_name)
|
|
15
|
+
solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
|
|
16
|
+
else
|
|
17
|
+
solr_doc.merge!( {field_name => [formatted_value]} )
|
|
18
|
+
end
|
|
19
|
+
return solr_doc
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
# Instance Methods
|
|
23
|
+
def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
|
24
|
+
Cul::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
|
|
25
|
+
end
|
|
26
|
+
end
|
|
27
|
+
end
|
|
@@ -0,0 +1,473 @@
|
|
|
1
|
+
module Cul::Hydra::Solrizer
|
|
2
|
+
module ModsFieldable
|
|
3
|
+
extend ActiveSupport::Concern
|
|
4
|
+
include Solrizer::DefaultDescriptors::Normal
|
|
5
|
+
|
|
6
|
+
MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
|
|
7
|
+
|
|
8
|
+
module ClassMethods
|
|
9
|
+
def value_mapper(maps=nil)
|
|
10
|
+
@value_mapper ||= ValueMapper.new(maps)
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def map_field(field_key, map_key)
|
|
14
|
+
value_mapper.map_field(field_key, map_key)
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
def map_value(field_key, value_key)
|
|
18
|
+
value_mapper.map_value(field_key, value_key)
|
|
19
|
+
end
|
|
20
|
+
|
|
21
|
+
def maps_field?(field_key)
|
|
22
|
+
value_mapper.maps_field? field_key
|
|
23
|
+
end
|
|
24
|
+
def normalize(t, strip_punctuation=false)
|
|
25
|
+
# strip whitespace
|
|
26
|
+
n_t = t.dup.strip
|
|
27
|
+
# collapse intermediate whitespace
|
|
28
|
+
n_t.gsub!(/\s+/, ' ')
|
|
29
|
+
# pull off paired punctuation, and any leading punctuation
|
|
30
|
+
if strip_punctuation
|
|
31
|
+
n_t = n_t.sub(/^\((.*)\)$/, "\\1")
|
|
32
|
+
n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
|
|
33
|
+
n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
|
|
34
|
+
n_t = n_t.sub(/^"(.*)"$/, "\\1")
|
|
35
|
+
n_t = n_t.sub(/^'(.*)'$/, "\\1")
|
|
36
|
+
n_t = n_t.sub(/^<(.*)>$/, "\\1")
|
|
37
|
+
#n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
|
|
38
|
+
n_t = n_t.sub(/^[[:punct:]]+/, '')
|
|
39
|
+
# this may have 'created' leading/trailing space, so strip
|
|
40
|
+
n_t.strip!
|
|
41
|
+
end
|
|
42
|
+
n_t
|
|
43
|
+
end
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
extend ClassMethods
|
|
47
|
+
|
|
48
|
+
def mods
|
|
49
|
+
ng_xml.xpath('/mods:mods', MODS_NS).first
|
|
50
|
+
end
|
|
51
|
+
|
|
52
|
+
def projects
|
|
53
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
|
|
54
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
def collections
|
|
59
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
|
|
60
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
|
61
|
+
end
|
|
62
|
+
end
|
|
63
|
+
|
|
64
|
+
def sort_title(node=mods)
|
|
65
|
+
# include only the untyped [!@type] titleInfo, exclude noSort
|
|
66
|
+
base_text = ''
|
|
67
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
|
68
|
+
if t
|
|
69
|
+
t.children.each do |child|
|
|
70
|
+
base_text << child.text unless child.name == 'nonSort'
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
base_text = ModsFieldable.normalize(base_text, true)
|
|
74
|
+
base_text = nil if base_text.empty?
|
|
75
|
+
base_text
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
def main_title(node=mods)
|
|
79
|
+
# include only the untyped [!@type] titleInfo
|
|
80
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
|
81
|
+
if t
|
|
82
|
+
ModsFieldable.normalize(t.text)
|
|
83
|
+
else
|
|
84
|
+
nil
|
|
85
|
+
end
|
|
86
|
+
end
|
|
87
|
+
|
|
88
|
+
def titles(node=mods)
|
|
89
|
+
# all titles without descending into relatedItems
|
|
90
|
+
# For now, this only includes the main title and selected alternate_titles
|
|
91
|
+
all_titles = []
|
|
92
|
+
all_titles << main_title unless main_title.nil?
|
|
93
|
+
all_titles += alternative_titles unless alternative_titles.nil?
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def alternative_titles(node=mods)
|
|
97
|
+
node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
|
|
98
|
+
ModsFieldable.normalize(t.text)
|
|
99
|
+
end
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def names(role_authority=nil, role=nil)
|
|
103
|
+
# get all the name nodes
|
|
104
|
+
# keep all child text except the role terms
|
|
105
|
+
xpath = "./mods:name"
|
|
106
|
+
unless role_authority.nil?
|
|
107
|
+
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
|
108
|
+
unless role.nil?
|
|
109
|
+
xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
|
|
110
|
+
end
|
|
111
|
+
xpath << "]/ancestor::mods:name"
|
|
112
|
+
end
|
|
113
|
+
names = mods.xpath(xpath, MODS_NS).collect do |node|
|
|
114
|
+
base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
|
115
|
+
ModsFieldable.normalize(base_text, true)
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
# Note: Removing subject names from name field extraction.
|
|
119
|
+
# See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
|
|
120
|
+
#xpath = "./mods:subject" + xpath[1,xpath.length]
|
|
121
|
+
#mods.xpath(xpath, MODS_NS).each do |node|
|
|
122
|
+
# base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
|
123
|
+
# names << ModsFieldable.normalize(base_text, true)
|
|
124
|
+
#end
|
|
125
|
+
|
|
126
|
+
names
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def dates(node=mods)
|
|
130
|
+
# get all the dateIssued with keyDate = 'yes', but not point = 'end'
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
def formats(node=mods)
|
|
134
|
+
# get all the form values with authority != 'marcform'
|
|
135
|
+
node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
|
|
136
|
+
ModsFieldable.normalize(n.text)
|
|
137
|
+
end
|
|
138
|
+
end
|
|
139
|
+
|
|
140
|
+
def repository_code(node=mods)
|
|
141
|
+
# get the location/physicalLocation[@authority = 'marcorg']
|
|
142
|
+
repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
|
|
143
|
+
|
|
144
|
+
if repo_code_node
|
|
145
|
+
ModsFieldable.normalize(repo_code_node.text)
|
|
146
|
+
else
|
|
147
|
+
return nil
|
|
148
|
+
end
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def repository_text(node=mods)
|
|
152
|
+
# get the location/physicalLocation[not(@authority)]
|
|
153
|
+
repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
|
|
154
|
+
|
|
155
|
+
if repo_text_node
|
|
156
|
+
ModsFieldable.normalize(repo_text_node.text)
|
|
157
|
+
else
|
|
158
|
+
return nil
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
def translate_repo_marc_code(code, type)
|
|
163
|
+
#code = ModsFieldable.normalize(code)
|
|
164
|
+
|
|
165
|
+
if type == 'short'
|
|
166
|
+
return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
|
|
167
|
+
elsif type == 'long'
|
|
168
|
+
return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
|
|
169
|
+
elsif type == 'full'
|
|
170
|
+
return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
return nil
|
|
174
|
+
end
|
|
175
|
+
|
|
176
|
+
def translate_project_title(project_title, type)
|
|
177
|
+
normalized_project_title = ModsFieldable.normalize(project_title)
|
|
178
|
+
|
|
179
|
+
if type == 'short'
|
|
180
|
+
return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
|
|
181
|
+
elsif type == 'full'
|
|
182
|
+
return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
return nil
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
def shelf_locators(node=mods)
|
|
189
|
+
node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
|
|
190
|
+
ModsFieldable.normalize(n.text, true)
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def textual_dates(node=mods)
|
|
195
|
+
dates = []
|
|
196
|
+
node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
|
197
|
+
dates << ModsFieldable.normalize(n.text, true)
|
|
198
|
+
end
|
|
199
|
+
node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
|
200
|
+
dates << ModsFieldable.normalize(n.text, true)
|
|
201
|
+
end
|
|
202
|
+
node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
|
203
|
+
dates << ModsFieldable.normalize(n.text, true)
|
|
204
|
+
end
|
|
205
|
+
return dates
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
def date_range_to_textual_date(start_year, end_year)
|
|
209
|
+
start_year = start_year.to_i.to_s # Remove zero-padding if present
|
|
210
|
+
end_year = end_year.to_i.to_s # Remove zero-padding if present
|
|
211
|
+
|
|
212
|
+
if start_year == end_year
|
|
213
|
+
return [start_year]
|
|
214
|
+
else
|
|
215
|
+
return [('Between ' +
|
|
216
|
+
(start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
|
|
217
|
+
' and ' +
|
|
218
|
+
(end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
|
|
219
|
+
)]
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def date_notes(node=mods)
|
|
224
|
+
date_notes = []
|
|
225
|
+
node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
|
|
226
|
+
date_notes << ModsFieldable.normalize(n.text, true)
|
|
227
|
+
end
|
|
228
|
+
return date_notes
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
def non_date_notes(node=mods)
|
|
232
|
+
non_date_notes = []
|
|
233
|
+
node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
|
|
234
|
+
if n.attr('type') == 'view direction'
|
|
235
|
+
non_date_notes << 'View Direction: ' + ModsFieldable.normalize(n.text, true)
|
|
236
|
+
else
|
|
237
|
+
non_date_notes << ModsFieldable.normalize(n.text, true)
|
|
238
|
+
end
|
|
239
|
+
|
|
240
|
+
|
|
241
|
+
end
|
|
242
|
+
return non_date_notes
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
def item_in_context_url(node=mods)
|
|
246
|
+
item_in_context_url_val = []
|
|
247
|
+
node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
|
|
248
|
+
item_in_context_url_val << ModsFieldable.normalize(n.text, true)
|
|
249
|
+
end
|
|
250
|
+
item_in_context_url_val
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
def non_item_in_context_url(node=mods)
|
|
254
|
+
non_item_in_context_url_val = []
|
|
255
|
+
node.xpath("./mods:location/mods:url[not(@access='object in context')]", MODS_NS).collect do |n|
|
|
256
|
+
non_item_in_context_url_val << ModsFieldable.normalize(n.text, true)
|
|
257
|
+
end
|
|
258
|
+
non_item_in_context_url_val
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def project_url(node=mods)
|
|
262
|
+
project_url_val = []
|
|
263
|
+
node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
|
|
264
|
+
project_url_val << ModsFieldable.normalize(n.text, true)
|
|
265
|
+
end
|
|
266
|
+
project_url_val
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
def all_subjects(node=mods)
|
|
270
|
+
list_of_subjects = []
|
|
271
|
+
|
|
272
|
+
node.xpath("./mods:subject[not(@authority) or @authority != 'Durst']/mods:topic", MODS_NS).collect do |n|
|
|
273
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
274
|
+
end
|
|
275
|
+
node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
|
|
276
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
277
|
+
end
|
|
278
|
+
node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
|
|
279
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
280
|
+
end
|
|
281
|
+
node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
|
|
282
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
283
|
+
end
|
|
284
|
+
node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
|
|
285
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
286
|
+
end
|
|
287
|
+
node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
|
|
288
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
289
|
+
end
|
|
290
|
+
|
|
291
|
+
return list_of_subjects
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
def durst_subjects(node=mods)
|
|
295
|
+
list_of_subjects = []
|
|
296
|
+
node.xpath("./mods:subject[@authority='Durst']/mods:topic", MODS_NS).collect do |n|
|
|
297
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
|
298
|
+
end
|
|
299
|
+
return list_of_subjects
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
def origin_info_place(node=mods)
|
|
303
|
+
places = []
|
|
304
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
|
|
305
|
+
places << ModsFieldable.normalize(n.text, true)
|
|
306
|
+
end
|
|
307
|
+
return places
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def origin_info_place_for_display(node=mods)
|
|
311
|
+
# If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
|
|
312
|
+
places_with_uri = []
|
|
313
|
+
places_without_uri = []
|
|
314
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
|
|
315
|
+
places_with_uri << ModsFieldable.normalize(n.text, true)
|
|
316
|
+
end
|
|
317
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
|
|
318
|
+
places_without_uri << ModsFieldable.normalize(n.text, true)
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
|
|
322
|
+
end
|
|
323
|
+
|
|
324
|
+
def coordinates(node=mods)
|
|
325
|
+
coordinate_values = []
|
|
326
|
+
node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
|
|
327
|
+
n = ModsFieldable.normalize(n.text, true)
|
|
328
|
+
if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
|
|
329
|
+
coordinate_values << n
|
|
330
|
+
end
|
|
331
|
+
end
|
|
332
|
+
coordinate_values
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
def to_solr(solr_doc={})
|
|
336
|
+
solr_doc = (defined? super) ? super : solr_doc
|
|
337
|
+
|
|
338
|
+
return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process, otherwise NoMethodError will be raised by subsequent lines.
|
|
339
|
+
|
|
340
|
+
solr_doc["all_text_teim"] ||= []
|
|
341
|
+
|
|
342
|
+
solr_doc["title_si"] = sort_title
|
|
343
|
+
solr_doc["title_ssm"] = titles
|
|
344
|
+
solr_doc["alternative_title_ssm"] = alternative_titles
|
|
345
|
+
solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
|
|
346
|
+
solr_doc["lib_collection_sim"] = collections
|
|
347
|
+
solr_doc["lib_name_sim"] = names
|
|
348
|
+
solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
|
|
349
|
+
solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
|
|
350
|
+
solr_doc["lib_all_subjects_ssm"] = all_subjects
|
|
351
|
+
solr_doc["durst_subjects_ssim"] = durst_subjects
|
|
352
|
+
solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
|
|
353
|
+
solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
|
|
354
|
+
solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
|
|
355
|
+
solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
|
|
356
|
+
solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
|
|
357
|
+
solr_doc["lib_format_sim"] = formats
|
|
358
|
+
solr_doc["lib_shelf_sim"] = shelf_locators
|
|
359
|
+
solr_doc["lib_date_textual_ssm"] = textual_dates
|
|
360
|
+
solr_doc["lib_date_notes_ssm"] = date_notes
|
|
361
|
+
solr_doc["lib_non_date_notes_ssm"] = non_date_notes
|
|
362
|
+
solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
|
|
363
|
+
solr_doc["lib_non_item_in_context_url_ssm"] = non_item_in_context_url
|
|
364
|
+
solr_doc["lib_project_url_ssm"] = project_url
|
|
365
|
+
solr_doc["origin_info_place_ssm"] = origin_info_place
|
|
366
|
+
solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
|
|
367
|
+
|
|
368
|
+
repo_marc_code = repository_code
|
|
369
|
+
unless repo_marc_code.nil?
|
|
370
|
+
solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
|
|
371
|
+
solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
|
|
372
|
+
solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
|
|
373
|
+
end
|
|
374
|
+
solr_doc["lib_repo_text_ssm"] = repository_text
|
|
375
|
+
|
|
376
|
+
project_titles = projects
|
|
377
|
+
unless project_titles.nil?
|
|
378
|
+
solr_doc["lib_project_short_ssim"] = []
|
|
379
|
+
solr_doc["lib_project_full_ssim"] = []
|
|
380
|
+
project_titles.each {|project_title|
|
|
381
|
+
solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
|
|
382
|
+
solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
|
|
383
|
+
}
|
|
384
|
+
solr_doc["lib_project_short_ssim"].uniq!
|
|
385
|
+
solr_doc["lib_project_full_ssim"].uniq!
|
|
386
|
+
end
|
|
387
|
+
|
|
388
|
+
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
|
389
|
+
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
|
390
|
+
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
|
391
|
+
start_date = nil
|
|
392
|
+
end_date = nil
|
|
393
|
+
start_year = nil
|
|
394
|
+
end_year = nil
|
|
395
|
+
possible_start_date_fields.each{|key|
|
|
396
|
+
if solr_doc.has_key?(key)
|
|
397
|
+
start_date = solr_doc[key][0]
|
|
398
|
+
break
|
|
399
|
+
end
|
|
400
|
+
}
|
|
401
|
+
possible_end_date_fields.each{|key|
|
|
402
|
+
if solr_doc.has_key?(key)
|
|
403
|
+
end_date = solr_doc[key][0]
|
|
404
|
+
break
|
|
405
|
+
end
|
|
406
|
+
}
|
|
407
|
+
|
|
408
|
+
if start_date.present?
|
|
409
|
+
|
|
410
|
+
start_year = nil
|
|
411
|
+
end_year = nil
|
|
412
|
+
|
|
413
|
+
start_date = nil if start_date == 'uuuu'
|
|
414
|
+
end_date = nil if end_date == 'uuuu'
|
|
415
|
+
start_date = start_date.gsub('u', '0') unless start_date.nil?
|
|
416
|
+
end_date = end_date.gsub('u', '0') unless end_date.nil?
|
|
417
|
+
|
|
418
|
+
end_date = start_date if end_date.blank?
|
|
419
|
+
start_date = end_date if start_date.blank?
|
|
420
|
+
|
|
421
|
+
year_regex = /^(-?\d{1,4}).*/
|
|
422
|
+
|
|
423
|
+
unless start_date.blank?
|
|
424
|
+
start_year_match = start_date.match(year_regex)
|
|
425
|
+
if start_year_match && start_year_match.captures.length > 0
|
|
426
|
+
start_year = start_year_match.captures[0]
|
|
427
|
+
start_year = zero_pad_year(start_year)
|
|
428
|
+
solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
|
|
429
|
+
end
|
|
430
|
+
end
|
|
431
|
+
|
|
432
|
+
unless end_date.blank?
|
|
433
|
+
end_year_match = end_date.match(year_regex)
|
|
434
|
+
if end_year_match && end_year_match.captures.length > 0
|
|
435
|
+
end_year = end_year_match.captures[0]
|
|
436
|
+
end_year = zero_pad_year(end_year)
|
|
437
|
+
solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year && end_year
|
|
442
|
+
solr_doc["lib_date_year_range_ss"] = solr_doc["lib_date_year_range_si"]
|
|
443
|
+
|
|
444
|
+
# When no textual date is available, fall back to other date data (if available)
|
|
445
|
+
if solr_doc["lib_date_textual_ssm"].blank?
|
|
446
|
+
solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
|
|
447
|
+
end
|
|
448
|
+
end
|
|
449
|
+
|
|
450
|
+
# Geo data
|
|
451
|
+
solr_doc["geo"] = coordinates
|
|
452
|
+
|
|
453
|
+
solr_doc.each do |k, v|
|
|
454
|
+
if self.class.maps_field? k
|
|
455
|
+
solr_doc[k] = self.class.map_value(k, v)
|
|
456
|
+
end
|
|
457
|
+
end
|
|
458
|
+
|
|
459
|
+
solr_doc
|
|
460
|
+
end
|
|
461
|
+
|
|
462
|
+
def zero_pad_year(year)
|
|
463
|
+
year = year.to_s
|
|
464
|
+
is_negative = year.start_with?('-')
|
|
465
|
+
year_without_sign = (is_negative ? year[1, year.length]: year)
|
|
466
|
+
if year_without_sign.length < 4
|
|
467
|
+
year_without_sign = year_without_sign.rjust(4, '0')
|
|
468
|
+
end
|
|
469
|
+
|
|
470
|
+
return (is_negative ? '-' : '') + year_without_sign
|
|
471
|
+
end
|
|
472
|
+
end
|
|
473
|
+
end
|