cul_hydra 1.0.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/app/assets/images/cul_hydra/crystal/binary.png +0 -0
- data/app/assets/images/cul_hydra/crystal/document.png +0 -0
- data/app/assets/images/cul_hydra/crystal/file.png +0 -0
- data/app/assets/images/cul_hydra/crystal/file_broken.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_documents.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_images.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_music.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_sound.png +0 -0
- data/app/assets/images/cul_hydra/crystal/folder_video.png +0 -0
- data/app/assets/images/cul_hydra/crystal/kmultiple.png +0 -0
- data/app/assets/images/cul_hydra/crystal/knotify.png +0 -0
- data/app/assets/images/cul_hydra/crystal/mp3.png +0 -0
- data/app/assets/images/cul_hydra/crystal/multimedia2.png +0 -0
- data/app/assets/images/cul_hydra/crystal/video.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/application.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/code.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/css.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/db.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/directory.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/doc.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/file.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/film.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/flash.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/folder_open.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/html.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/java.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/linux.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/music.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/pdf.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/php.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/picture.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/ppt.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/psd.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/ruby.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/script.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/spinner.gif +0 -0
- data/app/assets/images/cul_hydra/filesystem/txt.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/xls.png +0 -0
- data/app/assets/images/cul_hydra/filesystem/zip.png +0 -0
- data/app/controllers/concerns/cul/hydra/application_id_behavior.rb +43 -0
- data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
- data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
- data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
- data/app/helpers/cul/hydra/ore_proxies_helper_behavior.rb +119 -0
- data/app/helpers/cul/hydra/struct_metadata_helper_behavior.rb +89 -0
- data/app/models/bag_aggregator.rb +7 -0
- data/app/models/concept.rb +23 -0
- data/app/models/concerns/cul/hydra/models.rb +24 -0
- data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
- data/app/models/concerns/cul/hydra/models/common.rb +220 -0
- data/app/models/concerns/cul/hydra/models/image_resource.rb +106 -0
- data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
- data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
- data/app/models/concerns/nfo/common.rb +17 -0
- data/app/models/concerns/nfo/file_data_object.rb +10 -0
- data/app/models/concerns/nfo/folder.rb +10 -0
- data/app/models/concerns/nie/information_element.rb +10 -0
- data/app/models/concerns/ore/proxy.rb +124 -0
- data/app/models/concerns/rdf/cul.rb +77 -0
- data/app/models/concerns/rdf/fcrepo3.rb +360 -0
- data/app/models/concerns/rdf/nfo.rb +807 -0
- data/app/models/concerns/rdf/nie.rb +338 -0
- data/app/models/concerns/rdf/olo.rb +100 -0
- data/app/models/concerns/rdf/ore.rb +101 -0
- data/app/models/concerns/rdf/pimo.rb +605 -0
- data/app/models/concerns/rdf/sc.rb +47 -0
- data/app/models/concerns/sc/canvas.rb +12 -0
- data/app/models/concerns/sc/sequence.rb +21 -0
- data/app/models/content_aggregator.rb +3 -0
- data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
- data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
- data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
- data/app/models/dc_document.rb +39 -0
- data/app/models/generic_aggregator.rb +68 -0
- data/app/models/generic_object.rb +18 -0
- data/app/models/generic_resource.rb +210 -0
- data/app/models/jp2_image_aggregator.rb +34 -0
- data/app/models/mets_structured_aggregator.rb +18 -0
- data/app/models/resource.rb +78 -0
- data/app/models/resource_aggregator.rb +22 -0
- data/app/models/static_audio_aggregator.rb +12 -0
- data/app/models/static_image_aggregator.rb +32 -0
- data/bin/rails +12 -0
- data/config/fedora.yml +17 -0
- data/config/jetty.yml +6 -0
- data/config/locales/ldpd_hydra.en.yml +125 -0
- data/config/predicate_mappings.yml +79 -0
- data/config/solr.yml +8 -0
- data/config/solr_mappings.yml +26 -0
- data/config/solr_value_maps.yml +41 -0
- data/config/subs.yml +17 -0
- data/fixtures/cmodels/ldpd_ADLMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_AESMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_BagAggregator.xml +70 -0
- data/fixtures/cmodels/ldpd_Concept.xml +69 -0
- data/fixtures/cmodels/ldpd_ContentAggregator.xml +70 -0
- data/fixtures/cmodels/ldpd_DynamicAggregator.xml +56 -0
- data/fixtures/cmodels/ldpd_JP2ImageAggregator.xml +60 -0
- data/fixtures/cmodels/ldpd_METSMetadata.xml +56 -0
- data/fixtures/cmodels/ldpd_METSStructuredAggregator.xml +53 -0
- data/fixtures/cmodels/ldpd_MODSMetadata.xml +73 -0
- data/fixtures/cmodels/ldpd_MostRecent.xml +46 -0
- data/fixtures/cmodels/ldpd_PTIFImageAggregator.xml +63 -0
- data/fixtures/cmodels/ldpd_Resource.xml +72 -0
- data/fixtures/cmodels/ldpd_RestrictedResource.xml +54 -0
- data/fixtures/cmodels/ldpd_Since.xml +62 -0
- data/fixtures/cmodels/ldpd_StaticAudioAggregator.xml +54 -0
- data/fixtures/cmodels/ldpd_StaticImageAggregator.xml +71 -0
- data/fixtures/cmodels/ldpd_htest.xml +54 -0
- data/fixtures/cmodels/ldpd_nullbind.xml +63 -0
- data/fixtures/cmodels/ldpd_sdef.Aggregator.xml +71 -0
- data/fixtures/cmodels/ldpd_sdef.Core.xml +48 -0
- data/fixtures/cmodels/ldpd_sdef.Image.xml +47 -0
- data/fixtures/cmodels/ldpd_sdef.Metadata.xml +62 -0
- data/fixtures/cmodels/ldpd_sdef.Resource.xml +76 -0
- data/fixtures/cmodels/ldpd_sdef.ZoomingImage.xml +46 -0
- data/fixtures/cmodels/ldpd_sdep.BagAggregator.xml +160 -0
- data/fixtures/cmodels/ldpd_sdep.BagAggregatorCore.xml +221 -0
- data/fixtures/cmodels/ldpd_sdep.ContentAggregatorCore.xml +221 -0
- data/fixtures/cmodels/ldpd_sdep.DynamicAggregator.xml +171 -0
- data/fixtures/cmodels/ldpd_sdep.DynamicAggregatorCore.xml +215 -0
- data/fixtures/cmodels/ldpd_sdep.JP2Image.xml +220 -0
- data/fixtures/cmodels/ldpd_sdep.JP2ImageAggregator.xml +167 -0
- data/fixtures/cmodels/ldpd_sdep.JP2ImageCore.xml +229 -0
- data/fixtures/cmodels/ldpd_sdep.MODSMetadata.xml +158 -0
- data/fixtures/cmodels/ldpd_sdep.MODSMetadataCore.xml +227 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImage.xml +222 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImageAggregator.xml +167 -0
- data/fixtures/cmodels/ldpd_sdep.PTIFImageCore.xml +215 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImage.xml +210 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImageAggregator.xml +186 -0
- data/fixtures/cmodels/ldpd_sdep.StaticImageCore.xml +220 -0
- data/fixtures/cmodels/ore_Proxy.xml +50 -0
- data/fixtures/spec/BLOB/test001.jpg +0 -0
- data/fixtures/spec/CUL_DC/dc.xml +5 -0
- data/fixtures/spec/CUL_MODS/mods-001.xml +25 -0
- data/fixtures/spec/CUL_MODS/mods-all.xml +65 -0
- data/fixtures/spec/CUL_MODS/mods-bad-repo.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-created-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-created-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-end-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-issued-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-issued-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-other-range.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-other-single.xml +6 -0
- data/fixtures/spec/CUL_MODS/mods-date-range-short-years.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-date-start-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-dates-with-all-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-dates-with-some-u-characters.xml +7 -0
- data/fixtures/spec/CUL_MODS/mods-item.xml +31 -0
- data/fixtures/spec/CUL_MODS/mods-names.xml +35 -0
- data/fixtures/spec/CUL_MODS/mods-notes.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-ns.xml +2 -0
- data/fixtures/spec/CUL_MODS/mods-origin-info.xml +9 -0
- data/fixtures/spec/CUL_MODS/mods-part.xml +22 -0
- data/fixtures/spec/CUL_MODS/mods-physical-description.xml +12 -0
- data/fixtures/spec/CUL_MODS/mods-physical-location.xml +9 -0
- data/fixtures/spec/CUL_MODS/mods-record-info.xml +4 -0
- data/fixtures/spec/CUL_MODS/mods-relateditem-project.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-subjects.xml +73 -0
- data/fixtures/spec/CUL_MODS/mods-textual-date.xml +8 -0
- data/fixtures/spec/CUL_MODS/mods-titles.xml +33 -0
- data/fixtures/spec/CUL_MODS/mods-top-level-location-vs-relateditem-location.xml +21 -0
- data/fixtures/spec/CUL_MODS/mods-unmapped-project.xml +7 -0
- data/fixtures/spec/CUL_SOLR/mods-001.xml +1 -0
- data/fixtures/spec/CUL_SOLR/mods-001.yml +30 -0
- data/fixtures/spec/FOXML/content-aggregator.xml +64 -0
- data/fixtures/spec/FOXML/content-cmodel.xml +48 -0
- data/fixtures/spec/FOXML/image-cmodel.xml +48 -0
- data/fixtures/spec/FOXML/resource-max.xml +83 -0
- data/fixtures/spec/FOXML/resource-screen.xml +273 -0
- data/fixtures/spec/FOXML/resource-thumb.xml +86 -0
- data/fixtures/spec/FOXML/static-image-aggregator.xml +31 -0
- data/fixtures/spec/STRUCTMAP/structmap-examples.xml +21 -0
- data/fixtures/spec/STRUCTMAP/structmap-nested.xml +10 -0
- data/fixtures/spec/STRUCTMAP/structmap-recto.xml +4 -0
- data/fixtures/spec/STRUCTMAP/structmap-seq.xml +5 -0
- data/fixtures/spec/STRUCTMAP/structmap-unordered-seq.xml +5 -0
- data/lib/cul_hydra.rb +20 -0
- data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
- data/lib/cul_hydra/controllers.rb +13 -0
- data/lib/cul_hydra/controllers/aggregates.rb +93 -0
- data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
- data/lib/cul_hydra/controllers/catalog.rb +12 -0
- data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
- data/lib/cul_hydra/controllers/datastreams.rb +145 -0
- data/lib/cul_hydra/controllers/helpers.rb +10 -0
- data/lib/cul_hydra/controllers/helpers/active_fedora_helper_behavior.rb +9 -0
- data/lib/cul_hydra/controllers/helpers/application_helper_behavior.rb +16 -0
- data/lib/cul_hydra/controllers/helpers/dc_metadata_helper_behavior.rb +9 -0
- data/lib/cul_hydra/controllers/helpers/hydra_assets_helper_behavior.rb +46 -0
- data/lib/cul_hydra/controllers/helpers/hydra_autocomplete_helper_behavior.rb +35 -0
- data/lib/cul_hydra/controllers/helpers/hydra_uploader_helper_behavior.rb +34 -0
- data/lib/cul_hydra/controllers/helpers/resources_helper_behavior.rb +159 -0
- data/lib/cul_hydra/controllers/resources.rb +161 -0
- data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
- data/lib/cul_hydra/controllers/suggestions.rb +126 -0
- data/lib/cul_hydra/controllers/terms.rb +205 -0
- data/lib/cul_hydra/engine.rb +31 -0
- data/lib/cul_hydra/fedora.rb +41 -0
- data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
- data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
- data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
- data/lib/cul_hydra/indexer.rb +102 -0
- data/lib/cul_hydra/om.rb +7 -0
- data/lib/cul_hydra/om/standard_mods.rb +115 -0
- data/lib/cul_hydra/risearch_members.rb +92 -0
- data/lib/cul_hydra/solrizer.rb +10 -0
- data/lib/cul_hydra/solrizer/extractor.rb +27 -0
- data/lib/cul_hydra/solrizer/mods_fieldable.rb +473 -0
- data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
- data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
- data/lib/cul_hydra/solrizer_patch.rb +172 -0
- data/lib/cul_hydra/version.rb +8 -0
- data/lib/cul_hydra/version.rb~ +8 -0
- data/lib/tasks/cmodel.rake +122 -0
- data/lib/tasks/cul_hydra_dev.rake +54 -0
- data/lib/tasks/index.rake +73 -0
- data/lib/tasks/transform.rake +23 -0
- metadata +503 -0
@@ -0,0 +1,10 @@
|
|
1
|
+
module Cul
|
2
|
+
module Hydra
|
3
|
+
module Solrizer
|
4
|
+
autoload :Extractor, "cul_hydra/solrizer/extractor"
|
5
|
+
autoload :TerminologyBasedSolrizer, "cul_hydra/solrizer/terminology_based_solrizer"
|
6
|
+
autoload :ValueMapper, "cul_hydra/solrizer/value_mapper"
|
7
|
+
autoload :ModsFieldable, "cul_hydra/solrizer/mods_fieldable"
|
8
|
+
end
|
9
|
+
end
|
10
|
+
end
|
@@ -0,0 +1,27 @@
|
|
1
|
+
module Cul::Hydra::Solrizer
|
2
|
+
class Extractor < ::Solrizer::Extractor
|
3
|
+
# Insert +field_value+ for +field_name+ into +solr_doc+
|
4
|
+
# Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
|
5
|
+
# Ensures that field values are always appended to arrays within the values hash.
|
6
|
+
# Ensures that values are run through format_node_value
|
7
|
+
# Also ensures that values are unique if specified
|
8
|
+
# @param [Hash] solr_doc
|
9
|
+
# @param [String] field_name
|
10
|
+
# @param [String] field_value
|
11
|
+
# @param [boolean] unique
|
12
|
+
def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
13
|
+
formatted_value = self.format_node_value(field_value)
|
14
|
+
if solr_doc.has_key?(field_name)
|
15
|
+
solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
|
16
|
+
else
|
17
|
+
solr_doc.merge!( {field_name => [formatted_value]} )
|
18
|
+
end
|
19
|
+
return solr_doc
|
20
|
+
end
|
21
|
+
|
22
|
+
# Instance Methods
|
23
|
+
def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
|
24
|
+
Cul::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
@@ -0,0 +1,473 @@
|
|
1
|
+
module Cul::Hydra::Solrizer
|
2
|
+
module ModsFieldable
|
3
|
+
extend ActiveSupport::Concern
|
4
|
+
include Solrizer::DefaultDescriptors::Normal
|
5
|
+
|
6
|
+
MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
|
7
|
+
|
8
|
+
module ClassMethods
|
9
|
+
def value_mapper(maps=nil)
|
10
|
+
@value_mapper ||= ValueMapper.new(maps)
|
11
|
+
end
|
12
|
+
|
13
|
+
def map_field(field_key, map_key)
|
14
|
+
value_mapper.map_field(field_key, map_key)
|
15
|
+
end
|
16
|
+
|
17
|
+
def map_value(field_key, value_key)
|
18
|
+
value_mapper.map_value(field_key, value_key)
|
19
|
+
end
|
20
|
+
|
21
|
+
def maps_field?(field_key)
|
22
|
+
value_mapper.maps_field? field_key
|
23
|
+
end
|
24
|
+
def normalize(t, strip_punctuation=false)
|
25
|
+
# strip whitespace
|
26
|
+
n_t = t.dup.strip
|
27
|
+
# collapse intermediate whitespace
|
28
|
+
n_t.gsub!(/\s+/, ' ')
|
29
|
+
# pull off paired punctuation, and any leading punctuation
|
30
|
+
if strip_punctuation
|
31
|
+
n_t = n_t.sub(/^\((.*)\)$/, "\\1")
|
32
|
+
n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
|
33
|
+
n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
|
34
|
+
n_t = n_t.sub(/^"(.*)"$/, "\\1")
|
35
|
+
n_t = n_t.sub(/^'(.*)'$/, "\\1")
|
36
|
+
n_t = n_t.sub(/^<(.*)>$/, "\\1")
|
37
|
+
#n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
|
38
|
+
n_t = n_t.sub(/^[[:punct:]]+/, '')
|
39
|
+
# this may have 'created' leading/trailing space, so strip
|
40
|
+
n_t.strip!
|
41
|
+
end
|
42
|
+
n_t
|
43
|
+
end
|
44
|
+
end
|
45
|
+
|
46
|
+
extend ClassMethods
|
47
|
+
|
48
|
+
def mods
|
49
|
+
ng_xml.xpath('/mods:mods', MODS_NS).first
|
50
|
+
end
|
51
|
+
|
52
|
+
def projects
|
53
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
|
54
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
55
|
+
end
|
56
|
+
end
|
57
|
+
|
58
|
+
def collections
|
59
|
+
mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
|
60
|
+
ModsFieldable.normalize(main_title(p_node), true)
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
def sort_title(node=mods)
|
65
|
+
# include only the untyped [!@type] titleInfo, exclude noSort
|
66
|
+
base_text = ''
|
67
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
68
|
+
if t
|
69
|
+
t.children.each do |child|
|
70
|
+
base_text << child.text unless child.name == 'nonSort'
|
71
|
+
end
|
72
|
+
end
|
73
|
+
base_text = ModsFieldable.normalize(base_text, true)
|
74
|
+
base_text = nil if base_text.empty?
|
75
|
+
base_text
|
76
|
+
end
|
77
|
+
|
78
|
+
def main_title(node=mods)
|
79
|
+
# include only the untyped [!@type] titleInfo
|
80
|
+
t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
|
81
|
+
if t
|
82
|
+
ModsFieldable.normalize(t.text)
|
83
|
+
else
|
84
|
+
nil
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
def titles(node=mods)
|
89
|
+
# all titles without descending into relatedItems
|
90
|
+
# For now, this only includes the main title and selected alternate_titles
|
91
|
+
all_titles = []
|
92
|
+
all_titles << main_title unless main_title.nil?
|
93
|
+
all_titles += alternative_titles unless alternative_titles.nil?
|
94
|
+
end
|
95
|
+
|
96
|
+
def alternative_titles(node=mods)
|
97
|
+
node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
|
98
|
+
ModsFieldable.normalize(t.text)
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
def names(role_authority=nil, role=nil)
|
103
|
+
# get all the name nodes
|
104
|
+
# keep all child text except the role terms
|
105
|
+
xpath = "./mods:name"
|
106
|
+
unless role_authority.nil?
|
107
|
+
xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
|
108
|
+
unless role.nil?
|
109
|
+
xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
|
110
|
+
end
|
111
|
+
xpath << "]/ancestor::mods:name"
|
112
|
+
end
|
113
|
+
names = mods.xpath(xpath, MODS_NS).collect do |node|
|
114
|
+
base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
115
|
+
ModsFieldable.normalize(base_text, true)
|
116
|
+
end
|
117
|
+
|
118
|
+
# Note: Removing subject names from name field extraction.
|
119
|
+
# See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
|
120
|
+
#xpath = "./mods:subject" + xpath[1,xpath.length]
|
121
|
+
#mods.xpath(xpath, MODS_NS).each do |node|
|
122
|
+
# base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
|
123
|
+
# names << ModsFieldable.normalize(base_text, true)
|
124
|
+
#end
|
125
|
+
|
126
|
+
names
|
127
|
+
end
|
128
|
+
|
129
|
+
def dates(node=mods)
|
130
|
+
# get all the dateIssued with keyDate = 'yes', but not point = 'end'
|
131
|
+
end
|
132
|
+
|
133
|
+
def formats(node=mods)
|
134
|
+
# get all the form values with authority != 'marcform'
|
135
|
+
node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
|
136
|
+
ModsFieldable.normalize(n.text)
|
137
|
+
end
|
138
|
+
end
|
139
|
+
|
140
|
+
def repository_code(node=mods)
|
141
|
+
# get the location/physicalLocation[@authority = 'marcorg']
|
142
|
+
repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
|
143
|
+
|
144
|
+
if repo_code_node
|
145
|
+
ModsFieldable.normalize(repo_code_node.text)
|
146
|
+
else
|
147
|
+
return nil
|
148
|
+
end
|
149
|
+
end
|
150
|
+
|
151
|
+
def repository_text(node=mods)
|
152
|
+
# get the location/physicalLocation[not(@authority)]
|
153
|
+
repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
|
154
|
+
|
155
|
+
if repo_text_node
|
156
|
+
ModsFieldable.normalize(repo_text_node.text)
|
157
|
+
else
|
158
|
+
return nil
|
159
|
+
end
|
160
|
+
end
|
161
|
+
|
162
|
+
def translate_repo_marc_code(code, type)
|
163
|
+
#code = ModsFieldable.normalize(code)
|
164
|
+
|
165
|
+
if type == 'short'
|
166
|
+
return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
|
167
|
+
elsif type == 'long'
|
168
|
+
return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
|
169
|
+
elsif type == 'full'
|
170
|
+
return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
|
171
|
+
end
|
172
|
+
|
173
|
+
return nil
|
174
|
+
end
|
175
|
+
|
176
|
+
def translate_project_title(project_title, type)
|
177
|
+
normalized_project_title = ModsFieldable.normalize(project_title)
|
178
|
+
|
179
|
+
if type == 'short'
|
180
|
+
return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
|
181
|
+
elsif type == 'full'
|
182
|
+
return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
|
183
|
+
end
|
184
|
+
|
185
|
+
return nil
|
186
|
+
end
|
187
|
+
|
188
|
+
def shelf_locators(node=mods)
|
189
|
+
node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
|
190
|
+
ModsFieldable.normalize(n.text, true)
|
191
|
+
end
|
192
|
+
end
|
193
|
+
|
194
|
+
def textual_dates(node=mods)
|
195
|
+
dates = []
|
196
|
+
node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
197
|
+
dates << ModsFieldable.normalize(n.text, true)
|
198
|
+
end
|
199
|
+
node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
200
|
+
dates << ModsFieldable.normalize(n.text, true)
|
201
|
+
end
|
202
|
+
node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
|
203
|
+
dates << ModsFieldable.normalize(n.text, true)
|
204
|
+
end
|
205
|
+
return dates
|
206
|
+
end
|
207
|
+
|
208
|
+
def date_range_to_textual_date(start_year, end_year)
|
209
|
+
start_year = start_year.to_i.to_s # Remove zero-padding if present
|
210
|
+
end_year = end_year.to_i.to_s # Remove zero-padding if present
|
211
|
+
|
212
|
+
if start_year == end_year
|
213
|
+
return [start_year]
|
214
|
+
else
|
215
|
+
return [('Between ' +
|
216
|
+
(start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
|
217
|
+
' and ' +
|
218
|
+
(end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
|
219
|
+
)]
|
220
|
+
end
|
221
|
+
end
|
222
|
+
|
223
|
+
def date_notes(node=mods)
|
224
|
+
date_notes = []
|
225
|
+
node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
|
226
|
+
date_notes << ModsFieldable.normalize(n.text, true)
|
227
|
+
end
|
228
|
+
return date_notes
|
229
|
+
end
|
230
|
+
|
231
|
+
def non_date_notes(node=mods)
|
232
|
+
non_date_notes = []
|
233
|
+
node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
|
234
|
+
if n.attr('type') == 'view direction'
|
235
|
+
non_date_notes << 'View Direction: ' + ModsFieldable.normalize(n.text, true)
|
236
|
+
else
|
237
|
+
non_date_notes << ModsFieldable.normalize(n.text, true)
|
238
|
+
end
|
239
|
+
|
240
|
+
|
241
|
+
end
|
242
|
+
return non_date_notes
|
243
|
+
end
|
244
|
+
|
245
|
+
def item_in_context_url(node=mods)
|
246
|
+
item_in_context_url_val = []
|
247
|
+
node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
|
248
|
+
item_in_context_url_val << ModsFieldable.normalize(n.text, true)
|
249
|
+
end
|
250
|
+
item_in_context_url_val
|
251
|
+
end
|
252
|
+
|
253
|
+
def non_item_in_context_url(node=mods)
|
254
|
+
non_item_in_context_url_val = []
|
255
|
+
node.xpath("./mods:location/mods:url[not(@access='object in context')]", MODS_NS).collect do |n|
|
256
|
+
non_item_in_context_url_val << ModsFieldable.normalize(n.text, true)
|
257
|
+
end
|
258
|
+
non_item_in_context_url_val
|
259
|
+
end
|
260
|
+
|
261
|
+
def project_url(node=mods)
|
262
|
+
project_url_val = []
|
263
|
+
node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
|
264
|
+
project_url_val << ModsFieldable.normalize(n.text, true)
|
265
|
+
end
|
266
|
+
project_url_val
|
267
|
+
end
|
268
|
+
|
269
|
+
def all_subjects(node=mods)
|
270
|
+
list_of_subjects = []
|
271
|
+
|
272
|
+
node.xpath("./mods:subject[not(@authority) or @authority != 'Durst']/mods:topic", MODS_NS).collect do |n|
|
273
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
274
|
+
end
|
275
|
+
node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
|
276
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
277
|
+
end
|
278
|
+
node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
|
279
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
280
|
+
end
|
281
|
+
node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
|
282
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
283
|
+
end
|
284
|
+
node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
|
285
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
286
|
+
end
|
287
|
+
node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
|
288
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
289
|
+
end
|
290
|
+
|
291
|
+
return list_of_subjects
|
292
|
+
end
|
293
|
+
|
294
|
+
def durst_subjects(node=mods)
|
295
|
+
list_of_subjects = []
|
296
|
+
node.xpath("./mods:subject[@authority='Durst']/mods:topic", MODS_NS).collect do |n|
|
297
|
+
list_of_subjects << ModsFieldable.normalize(n.text, true)
|
298
|
+
end
|
299
|
+
return list_of_subjects
|
300
|
+
end
|
301
|
+
|
302
|
+
def origin_info_place(node=mods)
|
303
|
+
places = []
|
304
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
|
305
|
+
places << ModsFieldable.normalize(n.text, true)
|
306
|
+
end
|
307
|
+
return places
|
308
|
+
end
|
309
|
+
|
310
|
+
def origin_info_place_for_display(node=mods)
|
311
|
+
# If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
|
312
|
+
places_with_uri = []
|
313
|
+
places_without_uri = []
|
314
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
|
315
|
+
places_with_uri << ModsFieldable.normalize(n.text, true)
|
316
|
+
end
|
317
|
+
node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
|
318
|
+
places_without_uri << ModsFieldable.normalize(n.text, true)
|
319
|
+
end
|
320
|
+
|
321
|
+
return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
|
322
|
+
end
|
323
|
+
|
324
|
+
def coordinates(node=mods)
|
325
|
+
coordinate_values = []
|
326
|
+
node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
|
327
|
+
n = ModsFieldable.normalize(n.text, true)
|
328
|
+
if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
|
329
|
+
coordinate_values << n
|
330
|
+
end
|
331
|
+
end
|
332
|
+
coordinate_values
|
333
|
+
end
|
334
|
+
|
335
|
+
def to_solr(solr_doc={})
|
336
|
+
solr_doc = (defined? super) ? super : solr_doc
|
337
|
+
|
338
|
+
return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process, otherwise NoMethodError will be raised by subsequent lines.
|
339
|
+
|
340
|
+
solr_doc["all_text_teim"] ||= []
|
341
|
+
|
342
|
+
solr_doc["title_si"] = sort_title
|
343
|
+
solr_doc["title_ssm"] = titles
|
344
|
+
solr_doc["alternative_title_ssm"] = alternative_titles
|
345
|
+
solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
|
346
|
+
solr_doc["lib_collection_sim"] = collections
|
347
|
+
solr_doc["lib_name_sim"] = names
|
348
|
+
solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
|
349
|
+
solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
|
350
|
+
solr_doc["lib_all_subjects_ssm"] = all_subjects
|
351
|
+
solr_doc["durst_subjects_ssim"] = durst_subjects
|
352
|
+
solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
|
353
|
+
solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
|
354
|
+
solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
|
355
|
+
solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
|
356
|
+
solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
|
357
|
+
solr_doc["lib_format_sim"] = formats
|
358
|
+
solr_doc["lib_shelf_sim"] = shelf_locators
|
359
|
+
solr_doc["lib_date_textual_ssm"] = textual_dates
|
360
|
+
solr_doc["lib_date_notes_ssm"] = date_notes
|
361
|
+
solr_doc["lib_non_date_notes_ssm"] = non_date_notes
|
362
|
+
solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
|
363
|
+
solr_doc["lib_non_item_in_context_url_ssm"] = non_item_in_context_url
|
364
|
+
solr_doc["lib_project_url_ssm"] = project_url
|
365
|
+
solr_doc["origin_info_place_ssm"] = origin_info_place
|
366
|
+
solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
|
367
|
+
|
368
|
+
repo_marc_code = repository_code
|
369
|
+
unless repo_marc_code.nil?
|
370
|
+
solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
|
371
|
+
solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
|
372
|
+
solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
|
373
|
+
end
|
374
|
+
solr_doc["lib_repo_text_ssm"] = repository_text
|
375
|
+
|
376
|
+
project_titles = projects
|
377
|
+
unless project_titles.nil?
|
378
|
+
solr_doc["lib_project_short_ssim"] = []
|
379
|
+
solr_doc["lib_project_full_ssim"] = []
|
380
|
+
project_titles.each {|project_title|
|
381
|
+
solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
|
382
|
+
solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
|
383
|
+
}
|
384
|
+
solr_doc["lib_project_short_ssim"].uniq!
|
385
|
+
solr_doc["lib_project_full_ssim"].uniq!
|
386
|
+
end
|
387
|
+
|
388
|
+
# Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
|
389
|
+
possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
|
390
|
+
possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
|
391
|
+
start_date = nil
|
392
|
+
end_date = nil
|
393
|
+
start_year = nil
|
394
|
+
end_year = nil
|
395
|
+
possible_start_date_fields.each{|key|
|
396
|
+
if solr_doc.has_key?(key)
|
397
|
+
start_date = solr_doc[key][0]
|
398
|
+
break
|
399
|
+
end
|
400
|
+
}
|
401
|
+
possible_end_date_fields.each{|key|
|
402
|
+
if solr_doc.has_key?(key)
|
403
|
+
end_date = solr_doc[key][0]
|
404
|
+
break
|
405
|
+
end
|
406
|
+
}
|
407
|
+
|
408
|
+
if start_date.present?
|
409
|
+
|
410
|
+
start_year = nil
|
411
|
+
end_year = nil
|
412
|
+
|
413
|
+
start_date = nil if start_date == 'uuuu'
|
414
|
+
end_date = nil if end_date == 'uuuu'
|
415
|
+
start_date = start_date.gsub('u', '0') unless start_date.nil?
|
416
|
+
end_date = end_date.gsub('u', '0') unless end_date.nil?
|
417
|
+
|
418
|
+
end_date = start_date if end_date.blank?
|
419
|
+
start_date = end_date if start_date.blank?
|
420
|
+
|
421
|
+
year_regex = /^(-?\d{1,4}).*/
|
422
|
+
|
423
|
+
unless start_date.blank?
|
424
|
+
start_year_match = start_date.match(year_regex)
|
425
|
+
if start_year_match && start_year_match.captures.length > 0
|
426
|
+
start_year = start_year_match.captures[0]
|
427
|
+
start_year = zero_pad_year(start_year)
|
428
|
+
solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
|
429
|
+
end
|
430
|
+
end
|
431
|
+
|
432
|
+
unless end_date.blank?
|
433
|
+
end_year_match = end_date.match(year_regex)
|
434
|
+
if end_year_match && end_year_match.captures.length > 0
|
435
|
+
end_year = end_year_match.captures[0]
|
436
|
+
end_year = zero_pad_year(end_year)
|
437
|
+
solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
|
438
|
+
end
|
439
|
+
end
|
440
|
+
|
441
|
+
solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year && end_year
|
442
|
+
solr_doc["lib_date_year_range_ss"] = solr_doc["lib_date_year_range_si"]
|
443
|
+
|
444
|
+
# When no textual date is available, fall back to other date data (if available)
|
445
|
+
if solr_doc["lib_date_textual_ssm"].blank?
|
446
|
+
solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
|
447
|
+
end
|
448
|
+
end
|
449
|
+
|
450
|
+
# Geo data
|
451
|
+
solr_doc["geo"] = coordinates
|
452
|
+
|
453
|
+
solr_doc.each do |k, v|
|
454
|
+
if self.class.maps_field? k
|
455
|
+
solr_doc[k] = self.class.map_value(k, v)
|
456
|
+
end
|
457
|
+
end
|
458
|
+
|
459
|
+
solr_doc
|
460
|
+
end
|
461
|
+
|
462
|
+
def zero_pad_year(year)
|
463
|
+
year = year.to_s
|
464
|
+
is_negative = year.start_with?('-')
|
465
|
+
year_without_sign = (is_negative ? year[1, year.length]: year)
|
466
|
+
if year_without_sign.length < 4
|
467
|
+
year_without_sign = year_without_sign.rjust(4, '0')
|
468
|
+
end
|
469
|
+
|
470
|
+
return (is_negative ? '-' : '') + year_without_sign
|
471
|
+
end
|
472
|
+
end
|
473
|
+
end
|