cul_hydra 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (221) hide show
  1. checksums.yaml +7 -0
  2. data/app/assets/images/cul_hydra/crystal/binary.png +0 -0
  3. data/app/assets/images/cul_hydra/crystal/document.png +0 -0
  4. data/app/assets/images/cul_hydra/crystal/file.png +0 -0
  5. data/app/assets/images/cul_hydra/crystal/file_broken.png +0 -0
  6. data/app/assets/images/cul_hydra/crystal/folder_documents.png +0 -0
  7. data/app/assets/images/cul_hydra/crystal/folder_images.png +0 -0
  8. data/app/assets/images/cul_hydra/crystal/folder_music.png +0 -0
  9. data/app/assets/images/cul_hydra/crystal/folder_sound.png +0 -0
  10. data/app/assets/images/cul_hydra/crystal/folder_video.png +0 -0
  11. data/app/assets/images/cul_hydra/crystal/kmultiple.png +0 -0
  12. data/app/assets/images/cul_hydra/crystal/knotify.png +0 -0
  13. data/app/assets/images/cul_hydra/crystal/mp3.png +0 -0
  14. data/app/assets/images/cul_hydra/crystal/multimedia2.png +0 -0
  15. data/app/assets/images/cul_hydra/crystal/video.png +0 -0
  16. data/app/assets/images/cul_hydra/filesystem/application.png +0 -0
  17. data/app/assets/images/cul_hydra/filesystem/code.png +0 -0
  18. data/app/assets/images/cul_hydra/filesystem/css.png +0 -0
  19. data/app/assets/images/cul_hydra/filesystem/db.png +0 -0
  20. data/app/assets/images/cul_hydra/filesystem/directory.png +0 -0
  21. data/app/assets/images/cul_hydra/filesystem/doc.png +0 -0
  22. data/app/assets/images/cul_hydra/filesystem/file.png +0 -0
  23. data/app/assets/images/cul_hydra/filesystem/film.png +0 -0
  24. data/app/assets/images/cul_hydra/filesystem/flash.png +0 -0
  25. data/app/assets/images/cul_hydra/filesystem/folder_open.png +0 -0
  26. data/app/assets/images/cul_hydra/filesystem/html.png +0 -0
  27. data/app/assets/images/cul_hydra/filesystem/java.png +0 -0
  28. data/app/assets/images/cul_hydra/filesystem/linux.png +0 -0
  29. data/app/assets/images/cul_hydra/filesystem/music.png +0 -0
  30. data/app/assets/images/cul_hydra/filesystem/pdf.png +0 -0
  31. data/app/assets/images/cul_hydra/filesystem/php.png +0 -0
  32. data/app/assets/images/cul_hydra/filesystem/picture.png +0 -0
  33. data/app/assets/images/cul_hydra/filesystem/ppt.png +0 -0
  34. data/app/assets/images/cul_hydra/filesystem/psd.png +0 -0
  35. data/app/assets/images/cul_hydra/filesystem/ruby.png +0 -0
  36. data/app/assets/images/cul_hydra/filesystem/script.png +0 -0
  37. data/app/assets/images/cul_hydra/filesystem/spinner.gif +0 -0
  38. data/app/assets/images/cul_hydra/filesystem/txt.png +0 -0
  39. data/app/assets/images/cul_hydra/filesystem/xls.png +0 -0
  40. data/app/assets/images/cul_hydra/filesystem/zip.png +0 -0
  41. data/app/controllers/concerns/cul/hydra/application_id_behavior.rb +43 -0
  42. data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
  43. data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
  44. data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
  45. data/app/helpers/cul/hydra/ore_proxies_helper_behavior.rb +119 -0
  46. data/app/helpers/cul/hydra/struct_metadata_helper_behavior.rb +89 -0
  47. data/app/models/bag_aggregator.rb +7 -0
  48. data/app/models/concept.rb +23 -0
  49. data/app/models/concerns/cul/hydra/models.rb +24 -0
  50. data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
  51. data/app/models/concerns/cul/hydra/models/common.rb +220 -0
  52. data/app/models/concerns/cul/hydra/models/image_resource.rb +106 -0
  53. data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
  54. data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
  55. data/app/models/concerns/nfo/common.rb +17 -0
  56. data/app/models/concerns/nfo/file_data_object.rb +10 -0
  57. data/app/models/concerns/nfo/folder.rb +10 -0
  58. data/app/models/concerns/nie/information_element.rb +10 -0
  59. data/app/models/concerns/ore/proxy.rb +124 -0
  60. data/app/models/concerns/rdf/cul.rb +77 -0
  61. data/app/models/concerns/rdf/fcrepo3.rb +360 -0
  62. data/app/models/concerns/rdf/nfo.rb +807 -0
  63. data/app/models/concerns/rdf/nie.rb +338 -0
  64. data/app/models/concerns/rdf/olo.rb +100 -0
  65. data/app/models/concerns/rdf/ore.rb +101 -0
  66. data/app/models/concerns/rdf/pimo.rb +605 -0
  67. data/app/models/concerns/rdf/sc.rb +47 -0
  68. data/app/models/concerns/sc/canvas.rb +12 -0
  69. data/app/models/concerns/sc/sequence.rb +21 -0
  70. data/app/models/content_aggregator.rb +3 -0
  71. data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
  72. data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
  73. data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
  74. data/app/models/dc_document.rb +39 -0
  75. data/app/models/generic_aggregator.rb +68 -0
  76. data/app/models/generic_object.rb +18 -0
  77. data/app/models/generic_resource.rb +210 -0
  78. data/app/models/jp2_image_aggregator.rb +34 -0
  79. data/app/models/mets_structured_aggregator.rb +18 -0
  80. data/app/models/resource.rb +78 -0
  81. data/app/models/resource_aggregator.rb +22 -0
  82. data/app/models/static_audio_aggregator.rb +12 -0
  83. data/app/models/static_image_aggregator.rb +32 -0
  84. data/bin/rails +12 -0
  85. data/config/fedora.yml +17 -0
  86. data/config/jetty.yml +6 -0
  87. data/config/locales/ldpd_hydra.en.yml +125 -0
  88. data/config/predicate_mappings.yml +79 -0
  89. data/config/solr.yml +8 -0
  90. data/config/solr_mappings.yml +26 -0
  91. data/config/solr_value_maps.yml +41 -0
  92. data/config/subs.yml +17 -0
  93. data/fixtures/cmodels/ldpd_ADLMetadata.xml +56 -0
  94. data/fixtures/cmodels/ldpd_AESMetadata.xml +56 -0
  95. data/fixtures/cmodels/ldpd_BagAggregator.xml +70 -0
  96. data/fixtures/cmodels/ldpd_Concept.xml +69 -0
  97. data/fixtures/cmodels/ldpd_ContentAggregator.xml +70 -0
  98. data/fixtures/cmodels/ldpd_DynamicAggregator.xml +56 -0
  99. data/fixtures/cmodels/ldpd_JP2ImageAggregator.xml +60 -0
  100. data/fixtures/cmodels/ldpd_METSMetadata.xml +56 -0
  101. data/fixtures/cmodels/ldpd_METSStructuredAggregator.xml +53 -0
  102. data/fixtures/cmodels/ldpd_MODSMetadata.xml +73 -0
  103. data/fixtures/cmodels/ldpd_MostRecent.xml +46 -0
  104. data/fixtures/cmodels/ldpd_PTIFImageAggregator.xml +63 -0
  105. data/fixtures/cmodels/ldpd_Resource.xml +72 -0
  106. data/fixtures/cmodels/ldpd_RestrictedResource.xml +54 -0
  107. data/fixtures/cmodels/ldpd_Since.xml +62 -0
  108. data/fixtures/cmodels/ldpd_StaticAudioAggregator.xml +54 -0
  109. data/fixtures/cmodels/ldpd_StaticImageAggregator.xml +71 -0
  110. data/fixtures/cmodels/ldpd_htest.xml +54 -0
  111. data/fixtures/cmodels/ldpd_nullbind.xml +63 -0
  112. data/fixtures/cmodels/ldpd_sdef.Aggregator.xml +71 -0
  113. data/fixtures/cmodels/ldpd_sdef.Core.xml +48 -0
  114. data/fixtures/cmodels/ldpd_sdef.Image.xml +47 -0
  115. data/fixtures/cmodels/ldpd_sdef.Metadata.xml +62 -0
  116. data/fixtures/cmodels/ldpd_sdef.Resource.xml +76 -0
  117. data/fixtures/cmodels/ldpd_sdef.ZoomingImage.xml +46 -0
  118. data/fixtures/cmodels/ldpd_sdep.BagAggregator.xml +160 -0
  119. data/fixtures/cmodels/ldpd_sdep.BagAggregatorCore.xml +221 -0
  120. data/fixtures/cmodels/ldpd_sdep.ContentAggregatorCore.xml +221 -0
  121. data/fixtures/cmodels/ldpd_sdep.DynamicAggregator.xml +171 -0
  122. data/fixtures/cmodels/ldpd_sdep.DynamicAggregatorCore.xml +215 -0
  123. data/fixtures/cmodels/ldpd_sdep.JP2Image.xml +220 -0
  124. data/fixtures/cmodels/ldpd_sdep.JP2ImageAggregator.xml +167 -0
  125. data/fixtures/cmodels/ldpd_sdep.JP2ImageCore.xml +229 -0
  126. data/fixtures/cmodels/ldpd_sdep.MODSMetadata.xml +158 -0
  127. data/fixtures/cmodels/ldpd_sdep.MODSMetadataCore.xml +227 -0
  128. data/fixtures/cmodels/ldpd_sdep.PTIFImage.xml +222 -0
  129. data/fixtures/cmodels/ldpd_sdep.PTIFImageAggregator.xml +167 -0
  130. data/fixtures/cmodels/ldpd_sdep.PTIFImageCore.xml +215 -0
  131. data/fixtures/cmodels/ldpd_sdep.StaticImage.xml +210 -0
  132. data/fixtures/cmodels/ldpd_sdep.StaticImageAggregator.xml +186 -0
  133. data/fixtures/cmodels/ldpd_sdep.StaticImageCore.xml +220 -0
  134. data/fixtures/cmodels/ore_Proxy.xml +50 -0
  135. data/fixtures/spec/BLOB/test001.jpg +0 -0
  136. data/fixtures/spec/CUL_DC/dc.xml +5 -0
  137. data/fixtures/spec/CUL_MODS/mods-001.xml +25 -0
  138. data/fixtures/spec/CUL_MODS/mods-all.xml +65 -0
  139. data/fixtures/spec/CUL_MODS/mods-bad-repo.xml +7 -0
  140. data/fixtures/spec/CUL_MODS/mods-date-created-range.xml +7 -0
  141. data/fixtures/spec/CUL_MODS/mods-date-created-single.xml +6 -0
  142. data/fixtures/spec/CUL_MODS/mods-date-end-with-all-u-characters.xml +7 -0
  143. data/fixtures/spec/CUL_MODS/mods-date-issued-range.xml +7 -0
  144. data/fixtures/spec/CUL_MODS/mods-date-issued-single.xml +6 -0
  145. data/fixtures/spec/CUL_MODS/mods-date-other-range.xml +7 -0
  146. data/fixtures/spec/CUL_MODS/mods-date-other-single.xml +6 -0
  147. data/fixtures/spec/CUL_MODS/mods-date-range-short-years.xml +7 -0
  148. data/fixtures/spec/CUL_MODS/mods-date-start-with-all-u-characters.xml +7 -0
  149. data/fixtures/spec/CUL_MODS/mods-dates-with-all-u-characters.xml +7 -0
  150. data/fixtures/spec/CUL_MODS/mods-dates-with-some-u-characters.xml +7 -0
  151. data/fixtures/spec/CUL_MODS/mods-item.xml +31 -0
  152. data/fixtures/spec/CUL_MODS/mods-names.xml +35 -0
  153. data/fixtures/spec/CUL_MODS/mods-notes.xml +8 -0
  154. data/fixtures/spec/CUL_MODS/mods-ns.xml +2 -0
  155. data/fixtures/spec/CUL_MODS/mods-origin-info.xml +9 -0
  156. data/fixtures/spec/CUL_MODS/mods-part.xml +22 -0
  157. data/fixtures/spec/CUL_MODS/mods-physical-description.xml +12 -0
  158. data/fixtures/spec/CUL_MODS/mods-physical-location.xml +9 -0
  159. data/fixtures/spec/CUL_MODS/mods-record-info.xml +4 -0
  160. data/fixtures/spec/CUL_MODS/mods-relateditem-project.xml +8 -0
  161. data/fixtures/spec/CUL_MODS/mods-subjects.xml +73 -0
  162. data/fixtures/spec/CUL_MODS/mods-textual-date.xml +8 -0
  163. data/fixtures/spec/CUL_MODS/mods-titles.xml +33 -0
  164. data/fixtures/spec/CUL_MODS/mods-top-level-location-vs-relateditem-location.xml +21 -0
  165. data/fixtures/spec/CUL_MODS/mods-unmapped-project.xml +7 -0
  166. data/fixtures/spec/CUL_SOLR/mods-001.xml +1 -0
  167. data/fixtures/spec/CUL_SOLR/mods-001.yml +30 -0
  168. data/fixtures/spec/FOXML/content-aggregator.xml +64 -0
  169. data/fixtures/spec/FOXML/content-cmodel.xml +48 -0
  170. data/fixtures/spec/FOXML/image-cmodel.xml +48 -0
  171. data/fixtures/spec/FOXML/resource-max.xml +83 -0
  172. data/fixtures/spec/FOXML/resource-screen.xml +273 -0
  173. data/fixtures/spec/FOXML/resource-thumb.xml +86 -0
  174. data/fixtures/spec/FOXML/static-image-aggregator.xml +31 -0
  175. data/fixtures/spec/STRUCTMAP/structmap-examples.xml +21 -0
  176. data/fixtures/spec/STRUCTMAP/structmap-nested.xml +10 -0
  177. data/fixtures/spec/STRUCTMAP/structmap-recto.xml +4 -0
  178. data/fixtures/spec/STRUCTMAP/structmap-seq.xml +5 -0
  179. data/fixtures/spec/STRUCTMAP/structmap-unordered-seq.xml +5 -0
  180. data/lib/cul_hydra.rb +20 -0
  181. data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
  182. data/lib/cul_hydra/controllers.rb +13 -0
  183. data/lib/cul_hydra/controllers/aggregates.rb +93 -0
  184. data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
  185. data/lib/cul_hydra/controllers/catalog.rb +12 -0
  186. data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
  187. data/lib/cul_hydra/controllers/datastreams.rb +145 -0
  188. data/lib/cul_hydra/controllers/helpers.rb +10 -0
  189. data/lib/cul_hydra/controllers/helpers/active_fedora_helper_behavior.rb +9 -0
  190. data/lib/cul_hydra/controllers/helpers/application_helper_behavior.rb +16 -0
  191. data/lib/cul_hydra/controllers/helpers/dc_metadata_helper_behavior.rb +9 -0
  192. data/lib/cul_hydra/controllers/helpers/hydra_assets_helper_behavior.rb +46 -0
  193. data/lib/cul_hydra/controllers/helpers/hydra_autocomplete_helper_behavior.rb +35 -0
  194. data/lib/cul_hydra/controllers/helpers/hydra_uploader_helper_behavior.rb +34 -0
  195. data/lib/cul_hydra/controllers/helpers/resources_helper_behavior.rb +159 -0
  196. data/lib/cul_hydra/controllers/resources.rb +161 -0
  197. data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
  198. data/lib/cul_hydra/controllers/suggestions.rb +126 -0
  199. data/lib/cul_hydra/controllers/terms.rb +205 -0
  200. data/lib/cul_hydra/engine.rb +31 -0
  201. data/lib/cul_hydra/fedora.rb +41 -0
  202. data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
  203. data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
  204. data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
  205. data/lib/cul_hydra/indexer.rb +102 -0
  206. data/lib/cul_hydra/om.rb +7 -0
  207. data/lib/cul_hydra/om/standard_mods.rb +115 -0
  208. data/lib/cul_hydra/risearch_members.rb +92 -0
  209. data/lib/cul_hydra/solrizer.rb +10 -0
  210. data/lib/cul_hydra/solrizer/extractor.rb +27 -0
  211. data/lib/cul_hydra/solrizer/mods_fieldable.rb +473 -0
  212. data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
  213. data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
  214. data/lib/cul_hydra/solrizer_patch.rb +172 -0
  215. data/lib/cul_hydra/version.rb +8 -0
  216. data/lib/cul_hydra/version.rb~ +8 -0
  217. data/lib/tasks/cmodel.rake +122 -0
  218. data/lib/tasks/cul_hydra_dev.rake +54 -0
  219. data/lib/tasks/index.rake +73 -0
  220. data/lib/tasks/transform.rake +23 -0
  221. metadata +503 -0
@@ -0,0 +1,10 @@
1
+ module Cul
2
+ module Hydra
3
+ module Solrizer
4
+ autoload :Extractor, "cul_hydra/solrizer/extractor"
5
+ autoload :TerminologyBasedSolrizer, "cul_hydra/solrizer/terminology_based_solrizer"
6
+ autoload :ValueMapper, "cul_hydra/solrizer/value_mapper"
7
+ autoload :ModsFieldable, "cul_hydra/solrizer/mods_fieldable"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ module Cul::Hydra::Solrizer
2
+ class Extractor < ::Solrizer::Extractor
3
+ # Insert +field_value+ for +field_name+ into +solr_doc+
4
+ # Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
5
+ # Ensures that field values are always appended to arrays within the values hash.
6
+ # Ensures that values are run through format_node_value
7
+ # Also ensures that values are unique if specified
8
+ # @param [Hash] solr_doc
9
+ # @param [String] field_name
10
+ # @param [String] field_value
11
+ # @param [boolean] unique
12
+ def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
13
+ formatted_value = self.format_node_value(field_value)
14
+ if solr_doc.has_key?(field_name)
15
+ solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
16
+ else
17
+ solr_doc.merge!( {field_name => [formatted_value]} )
18
+ end
19
+ return solr_doc
20
+ end
21
+
22
+ # Instance Methods
23
+ def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
24
+ Cul::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,473 @@
1
+ module Cul::Hydra::Solrizer
2
+ module ModsFieldable
3
+ extend ActiveSupport::Concern
4
+ include Solrizer::DefaultDescriptors::Normal
5
+
6
+ MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
7
+
8
+ module ClassMethods
9
+ def value_mapper(maps=nil)
10
+ @value_mapper ||= ValueMapper.new(maps)
11
+ end
12
+
13
+ def map_field(field_key, map_key)
14
+ value_mapper.map_field(field_key, map_key)
15
+ end
16
+
17
+ def map_value(field_key, value_key)
18
+ value_mapper.map_value(field_key, value_key)
19
+ end
20
+
21
+ def maps_field?(field_key)
22
+ value_mapper.maps_field? field_key
23
+ end
24
+ def normalize(t, strip_punctuation=false)
25
+ # strip whitespace
26
+ n_t = t.dup.strip
27
+ # collapse intermediate whitespace
28
+ n_t.gsub!(/\s+/, ' ')
29
+ # pull off paired punctuation, and any leading punctuation
30
+ if strip_punctuation
31
+ n_t = n_t.sub(/^\((.*)\)$/, "\\1")
32
+ n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
33
+ n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
34
+ n_t = n_t.sub(/^"(.*)"$/, "\\1")
35
+ n_t = n_t.sub(/^'(.*)'$/, "\\1")
36
+ n_t = n_t.sub(/^<(.*)>$/, "\\1")
37
+ #n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
38
+ n_t = n_t.sub(/^[[:punct:]]+/, '')
39
+ # this may have 'created' leading/trailing space, so strip
40
+ n_t.strip!
41
+ end
42
+ n_t
43
+ end
44
+ end
45
+
46
+ extend ClassMethods
47
+
48
+ def mods
49
+ ng_xml.xpath('/mods:mods', MODS_NS).first
50
+ end
51
+
52
+ def projects
53
+ mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
54
+ ModsFieldable.normalize(main_title(p_node), true)
55
+ end
56
+ end
57
+
58
+ def collections
59
+ mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
60
+ ModsFieldable.normalize(main_title(p_node), true)
61
+ end
62
+ end
63
+
64
+ def sort_title(node=mods)
65
+ # include only the untyped [!@type] titleInfo, exclude noSort
66
+ base_text = ''
67
+ t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
68
+ if t
69
+ t.children.each do |child|
70
+ base_text << child.text unless child.name == 'nonSort'
71
+ end
72
+ end
73
+ base_text = ModsFieldable.normalize(base_text, true)
74
+ base_text = nil if base_text.empty?
75
+ base_text
76
+ end
77
+
78
+ def main_title(node=mods)
79
+ # include only the untyped [!@type] titleInfo
80
+ t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
81
+ if t
82
+ ModsFieldable.normalize(t.text)
83
+ else
84
+ nil
85
+ end
86
+ end
87
+
88
+ def titles(node=mods)
89
+ # all titles without descending into relatedItems
90
+ # For now, this only includes the main title and selected alternate_titles
91
+ all_titles = []
92
+ all_titles << main_title unless main_title.nil?
93
+ all_titles += alternative_titles unless alternative_titles.nil?
94
+ end
95
+
96
+ def alternative_titles(node=mods)
97
+ node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
98
+ ModsFieldable.normalize(t.text)
99
+ end
100
+ end
101
+
102
+ def names(role_authority=nil, role=nil)
103
+ # get all the name nodes
104
+ # keep all child text except the role terms
105
+ xpath = "./mods:name"
106
+ unless role_authority.nil?
107
+ xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
108
+ unless role.nil?
109
+ xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
110
+ end
111
+ xpath << "]/ancestor::mods:name"
112
+ end
113
+ names = mods.xpath(xpath, MODS_NS).collect do |node|
114
+ base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
115
+ ModsFieldable.normalize(base_text, true)
116
+ end
117
+
118
+ # Note: Removing subject names from name field extraction.
119
+ # See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
120
+ #xpath = "./mods:subject" + xpath[1,xpath.length]
121
+ #mods.xpath(xpath, MODS_NS).each do |node|
122
+ # base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
123
+ # names << ModsFieldable.normalize(base_text, true)
124
+ #end
125
+
126
+ names
127
+ end
128
+
129
+ def dates(node=mods)
130
+ # get all the dateIssued with keyDate = 'yes', but not point = 'end'
131
+ end
132
+
133
+ def formats(node=mods)
134
+ # get all the form values with authority != 'marcform'
135
+ node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
136
+ ModsFieldable.normalize(n.text)
137
+ end
138
+ end
139
+
140
+ def repository_code(node=mods)
141
+ # get the location/physicalLocation[@authority = 'marcorg']
142
+ repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
143
+
144
+ if repo_code_node
145
+ ModsFieldable.normalize(repo_code_node.text)
146
+ else
147
+ return nil
148
+ end
149
+ end
150
+
151
+ def repository_text(node=mods)
152
+ # get the location/physicalLocation[not(@authority)]
153
+ repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
154
+
155
+ if repo_text_node
156
+ ModsFieldable.normalize(repo_text_node.text)
157
+ else
158
+ return nil
159
+ end
160
+ end
161
+
162
+ def translate_repo_marc_code(code, type)
163
+ #code = ModsFieldable.normalize(code)
164
+
165
+ if type == 'short'
166
+ return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
167
+ elsif type == 'long'
168
+ return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
169
+ elsif type == 'full'
170
+ return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
171
+ end
172
+
173
+ return nil
174
+ end
175
+
176
+ def translate_project_title(project_title, type)
177
+ normalized_project_title = ModsFieldable.normalize(project_title)
178
+
179
+ if type == 'short'
180
+ return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
181
+ elsif type == 'full'
182
+ return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
183
+ end
184
+
185
+ return nil
186
+ end
187
+
188
+ def shelf_locators(node=mods)
189
+ node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
190
+ ModsFieldable.normalize(n.text, true)
191
+ end
192
+ end
193
+
194
+ def textual_dates(node=mods)
195
+ dates = []
196
+ node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
197
+ dates << ModsFieldable.normalize(n.text, true)
198
+ end
199
+ node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
200
+ dates << ModsFieldable.normalize(n.text, true)
201
+ end
202
+ node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
203
+ dates << ModsFieldable.normalize(n.text, true)
204
+ end
205
+ return dates
206
+ end
207
+
208
+ def date_range_to_textual_date(start_year, end_year)
209
+ start_year = start_year.to_i.to_s # Remove zero-padding if present
210
+ end_year = end_year.to_i.to_s # Remove zero-padding if present
211
+
212
+ if start_year == end_year
213
+ return [start_year]
214
+ else
215
+ return [('Between ' +
216
+ (start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
217
+ ' and ' +
218
+ (end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
219
+ )]
220
+ end
221
+ end
222
+
223
+ def date_notes(node=mods)
224
+ date_notes = []
225
+ node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
226
+ date_notes << ModsFieldable.normalize(n.text, true)
227
+ end
228
+ return date_notes
229
+ end
230
+
231
+ def non_date_notes(node=mods)
232
+ non_date_notes = []
233
+ node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
234
+ if n.attr('type') == 'view direction'
235
+ non_date_notes << 'View Direction: ' + ModsFieldable.normalize(n.text, true)
236
+ else
237
+ non_date_notes << ModsFieldable.normalize(n.text, true)
238
+ end
239
+
240
+
241
+ end
242
+ return non_date_notes
243
+ end
244
+
245
+ def item_in_context_url(node=mods)
246
+ item_in_context_url_val = []
247
+ node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
248
+ item_in_context_url_val << ModsFieldable.normalize(n.text, true)
249
+ end
250
+ item_in_context_url_val
251
+ end
252
+
253
+ def non_item_in_context_url(node=mods)
254
+ non_item_in_context_url_val = []
255
+ node.xpath("./mods:location/mods:url[not(@access='object in context')]", MODS_NS).collect do |n|
256
+ non_item_in_context_url_val << ModsFieldable.normalize(n.text, true)
257
+ end
258
+ non_item_in_context_url_val
259
+ end
260
+
261
+ def project_url(node=mods)
262
+ project_url_val = []
263
+ node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
264
+ project_url_val << ModsFieldable.normalize(n.text, true)
265
+ end
266
+ project_url_val
267
+ end
268
+
269
+ def all_subjects(node=mods)
270
+ list_of_subjects = []
271
+
272
+ node.xpath("./mods:subject[not(@authority) or @authority != 'Durst']/mods:topic", MODS_NS).collect do |n|
273
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
274
+ end
275
+ node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
276
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
277
+ end
278
+ node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
279
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
280
+ end
281
+ node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
282
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
283
+ end
284
+ node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
285
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
286
+ end
287
+ node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
288
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
289
+ end
290
+
291
+ return list_of_subjects
292
+ end
293
+
294
+ def durst_subjects(node=mods)
295
+ list_of_subjects = []
296
+ node.xpath("./mods:subject[@authority='Durst']/mods:topic", MODS_NS).collect do |n|
297
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
298
+ end
299
+ return list_of_subjects
300
+ end
301
+
302
+ def origin_info_place(node=mods)
303
+ places = []
304
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
305
+ places << ModsFieldable.normalize(n.text, true)
306
+ end
307
+ return places
308
+ end
309
+
310
+ def origin_info_place_for_display(node=mods)
311
+ # If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
312
+ places_with_uri = []
313
+ places_without_uri = []
314
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
315
+ places_with_uri << ModsFieldable.normalize(n.text, true)
316
+ end
317
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
318
+ places_without_uri << ModsFieldable.normalize(n.text, true)
319
+ end
320
+
321
+ return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
322
+ end
323
+
324
+ def coordinates(node=mods)
325
+ coordinate_values = []
326
+ node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
327
+ n = ModsFieldable.normalize(n.text, true)
328
+ if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
329
+ coordinate_values << n
330
+ end
331
+ end
332
+ coordinate_values
333
+ end
334
+
335
+ def to_solr(solr_doc={})
336
+ solr_doc = (defined? super) ? super : solr_doc
337
+
338
+ return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process, otherwise NoMethodError will be raised by subsequent lines.
339
+
340
+ solr_doc["all_text_teim"] ||= []
341
+
342
+ solr_doc["title_si"] = sort_title
343
+ solr_doc["title_ssm"] = titles
344
+ solr_doc["alternative_title_ssm"] = alternative_titles
345
+ solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
346
+ solr_doc["lib_collection_sim"] = collections
347
+ solr_doc["lib_name_sim"] = names
348
+ solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
349
+ solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
350
+ solr_doc["lib_all_subjects_ssm"] = all_subjects
351
+ solr_doc["durst_subjects_ssim"] = durst_subjects
352
+ solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
353
+ solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
354
+ solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
355
+ solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
356
+ solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
357
+ solr_doc["lib_format_sim"] = formats
358
+ solr_doc["lib_shelf_sim"] = shelf_locators
359
+ solr_doc["lib_date_textual_ssm"] = textual_dates
360
+ solr_doc["lib_date_notes_ssm"] = date_notes
361
+ solr_doc["lib_non_date_notes_ssm"] = non_date_notes
362
+ solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
363
+ solr_doc["lib_non_item_in_context_url_ssm"] = non_item_in_context_url
364
+ solr_doc["lib_project_url_ssm"] = project_url
365
+ solr_doc["origin_info_place_ssm"] = origin_info_place
366
+ solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
367
+
368
+ repo_marc_code = repository_code
369
+ unless repo_marc_code.nil?
370
+ solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
371
+ solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
372
+ solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
373
+ end
374
+ solr_doc["lib_repo_text_ssm"] = repository_text
375
+
376
+ project_titles = projects
377
+ unless project_titles.nil?
378
+ solr_doc["lib_project_short_ssim"] = []
379
+ solr_doc["lib_project_full_ssim"] = []
380
+ project_titles.each {|project_title|
381
+ solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
382
+ solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
383
+ }
384
+ solr_doc["lib_project_short_ssim"].uniq!
385
+ solr_doc["lib_project_full_ssim"].uniq!
386
+ end
387
+
388
+ # Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
389
+ possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
390
+ possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
391
+ start_date = nil
392
+ end_date = nil
393
+ start_year = nil
394
+ end_year = nil
395
+ possible_start_date_fields.each{|key|
396
+ if solr_doc.has_key?(key)
397
+ start_date = solr_doc[key][0]
398
+ break
399
+ end
400
+ }
401
+ possible_end_date_fields.each{|key|
402
+ if solr_doc.has_key?(key)
403
+ end_date = solr_doc[key][0]
404
+ break
405
+ end
406
+ }
407
+
408
+ if start_date.present?
409
+
410
+ start_year = nil
411
+ end_year = nil
412
+
413
+ start_date = nil if start_date == 'uuuu'
414
+ end_date = nil if end_date == 'uuuu'
415
+ start_date = start_date.gsub('u', '0') unless start_date.nil?
416
+ end_date = end_date.gsub('u', '0') unless end_date.nil?
417
+
418
+ end_date = start_date if end_date.blank?
419
+ start_date = end_date if start_date.blank?
420
+
421
+ year_regex = /^(-?\d{1,4}).*/
422
+
423
+ unless start_date.blank?
424
+ start_year_match = start_date.match(year_regex)
425
+ if start_year_match && start_year_match.captures.length > 0
426
+ start_year = start_year_match.captures[0]
427
+ start_year = zero_pad_year(start_year)
428
+ solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
429
+ end
430
+ end
431
+
432
+ unless end_date.blank?
433
+ end_year_match = end_date.match(year_regex)
434
+ if end_year_match && end_year_match.captures.length > 0
435
+ end_year = end_year_match.captures[0]
436
+ end_year = zero_pad_year(end_year)
437
+ solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
438
+ end
439
+ end
440
+
441
+ solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year && end_year
442
+ solr_doc["lib_date_year_range_ss"] = solr_doc["lib_date_year_range_si"]
443
+
444
+ # When no textual date is available, fall back to other date data (if available)
445
+ if solr_doc["lib_date_textual_ssm"].blank?
446
+ solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
447
+ end
448
+ end
449
+
450
+ # Geo data
451
+ solr_doc["geo"] = coordinates
452
+
453
+ solr_doc.each do |k, v|
454
+ if self.class.maps_field? k
455
+ solr_doc[k] = self.class.map_value(k, v)
456
+ end
457
+ end
458
+
459
+ solr_doc
460
+ end
461
+
462
+ def zero_pad_year(year)
463
+ year = year.to_s
464
+ is_negative = year.start_with?('-')
465
+ year_without_sign = (is_negative ? year[1, year.length]: year)
466
+ if year_without_sign.length < 4
467
+ year_without_sign = year_without_sign.rjust(4, '0')
468
+ end
469
+
470
+ return (is_negative ? '-' : '') + year_without_sign
471
+ end
472
+ end
473
+ end