cul_hydra 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (221) hide show
  1. checksums.yaml +7 -0
  2. data/app/assets/images/cul_hydra/crystal/binary.png +0 -0
  3. data/app/assets/images/cul_hydra/crystal/document.png +0 -0
  4. data/app/assets/images/cul_hydra/crystal/file.png +0 -0
  5. data/app/assets/images/cul_hydra/crystal/file_broken.png +0 -0
  6. data/app/assets/images/cul_hydra/crystal/folder_documents.png +0 -0
  7. data/app/assets/images/cul_hydra/crystal/folder_images.png +0 -0
  8. data/app/assets/images/cul_hydra/crystal/folder_music.png +0 -0
  9. data/app/assets/images/cul_hydra/crystal/folder_sound.png +0 -0
  10. data/app/assets/images/cul_hydra/crystal/folder_video.png +0 -0
  11. data/app/assets/images/cul_hydra/crystal/kmultiple.png +0 -0
  12. data/app/assets/images/cul_hydra/crystal/knotify.png +0 -0
  13. data/app/assets/images/cul_hydra/crystal/mp3.png +0 -0
  14. data/app/assets/images/cul_hydra/crystal/multimedia2.png +0 -0
  15. data/app/assets/images/cul_hydra/crystal/video.png +0 -0
  16. data/app/assets/images/cul_hydra/filesystem/application.png +0 -0
  17. data/app/assets/images/cul_hydra/filesystem/code.png +0 -0
  18. data/app/assets/images/cul_hydra/filesystem/css.png +0 -0
  19. data/app/assets/images/cul_hydra/filesystem/db.png +0 -0
  20. data/app/assets/images/cul_hydra/filesystem/directory.png +0 -0
  21. data/app/assets/images/cul_hydra/filesystem/doc.png +0 -0
  22. data/app/assets/images/cul_hydra/filesystem/file.png +0 -0
  23. data/app/assets/images/cul_hydra/filesystem/film.png +0 -0
  24. data/app/assets/images/cul_hydra/filesystem/flash.png +0 -0
  25. data/app/assets/images/cul_hydra/filesystem/folder_open.png +0 -0
  26. data/app/assets/images/cul_hydra/filesystem/html.png +0 -0
  27. data/app/assets/images/cul_hydra/filesystem/java.png +0 -0
  28. data/app/assets/images/cul_hydra/filesystem/linux.png +0 -0
  29. data/app/assets/images/cul_hydra/filesystem/music.png +0 -0
  30. data/app/assets/images/cul_hydra/filesystem/pdf.png +0 -0
  31. data/app/assets/images/cul_hydra/filesystem/php.png +0 -0
  32. data/app/assets/images/cul_hydra/filesystem/picture.png +0 -0
  33. data/app/assets/images/cul_hydra/filesystem/ppt.png +0 -0
  34. data/app/assets/images/cul_hydra/filesystem/psd.png +0 -0
  35. data/app/assets/images/cul_hydra/filesystem/ruby.png +0 -0
  36. data/app/assets/images/cul_hydra/filesystem/script.png +0 -0
  37. data/app/assets/images/cul_hydra/filesystem/spinner.gif +0 -0
  38. data/app/assets/images/cul_hydra/filesystem/txt.png +0 -0
  39. data/app/assets/images/cul_hydra/filesystem/xls.png +0 -0
  40. data/app/assets/images/cul_hydra/filesystem/zip.png +0 -0
  41. data/app/controllers/concerns/cul/hydra/application_id_behavior.rb +43 -0
  42. data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
  43. data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
  44. data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
  45. data/app/helpers/cul/hydra/ore_proxies_helper_behavior.rb +119 -0
  46. data/app/helpers/cul/hydra/struct_metadata_helper_behavior.rb +89 -0
  47. data/app/models/bag_aggregator.rb +7 -0
  48. data/app/models/concept.rb +23 -0
  49. data/app/models/concerns/cul/hydra/models.rb +24 -0
  50. data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
  51. data/app/models/concerns/cul/hydra/models/common.rb +220 -0
  52. data/app/models/concerns/cul/hydra/models/image_resource.rb +106 -0
  53. data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
  54. data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
  55. data/app/models/concerns/nfo/common.rb +17 -0
  56. data/app/models/concerns/nfo/file_data_object.rb +10 -0
  57. data/app/models/concerns/nfo/folder.rb +10 -0
  58. data/app/models/concerns/nie/information_element.rb +10 -0
  59. data/app/models/concerns/ore/proxy.rb +124 -0
  60. data/app/models/concerns/rdf/cul.rb +77 -0
  61. data/app/models/concerns/rdf/fcrepo3.rb +360 -0
  62. data/app/models/concerns/rdf/nfo.rb +807 -0
  63. data/app/models/concerns/rdf/nie.rb +338 -0
  64. data/app/models/concerns/rdf/olo.rb +100 -0
  65. data/app/models/concerns/rdf/ore.rb +101 -0
  66. data/app/models/concerns/rdf/pimo.rb +605 -0
  67. data/app/models/concerns/rdf/sc.rb +47 -0
  68. data/app/models/concerns/sc/canvas.rb +12 -0
  69. data/app/models/concerns/sc/sequence.rb +21 -0
  70. data/app/models/content_aggregator.rb +3 -0
  71. data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
  72. data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
  73. data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
  74. data/app/models/dc_document.rb +39 -0
  75. data/app/models/generic_aggregator.rb +68 -0
  76. data/app/models/generic_object.rb +18 -0
  77. data/app/models/generic_resource.rb +210 -0
  78. data/app/models/jp2_image_aggregator.rb +34 -0
  79. data/app/models/mets_structured_aggregator.rb +18 -0
  80. data/app/models/resource.rb +78 -0
  81. data/app/models/resource_aggregator.rb +22 -0
  82. data/app/models/static_audio_aggregator.rb +12 -0
  83. data/app/models/static_image_aggregator.rb +32 -0
  84. data/bin/rails +12 -0
  85. data/config/fedora.yml +17 -0
  86. data/config/jetty.yml +6 -0
  87. data/config/locales/ldpd_hydra.en.yml +125 -0
  88. data/config/predicate_mappings.yml +79 -0
  89. data/config/solr.yml +8 -0
  90. data/config/solr_mappings.yml +26 -0
  91. data/config/solr_value_maps.yml +41 -0
  92. data/config/subs.yml +17 -0
  93. data/fixtures/cmodels/ldpd_ADLMetadata.xml +56 -0
  94. data/fixtures/cmodels/ldpd_AESMetadata.xml +56 -0
  95. data/fixtures/cmodels/ldpd_BagAggregator.xml +70 -0
  96. data/fixtures/cmodels/ldpd_Concept.xml +69 -0
  97. data/fixtures/cmodels/ldpd_ContentAggregator.xml +70 -0
  98. data/fixtures/cmodels/ldpd_DynamicAggregator.xml +56 -0
  99. data/fixtures/cmodels/ldpd_JP2ImageAggregator.xml +60 -0
  100. data/fixtures/cmodels/ldpd_METSMetadata.xml +56 -0
  101. data/fixtures/cmodels/ldpd_METSStructuredAggregator.xml +53 -0
  102. data/fixtures/cmodels/ldpd_MODSMetadata.xml +73 -0
  103. data/fixtures/cmodels/ldpd_MostRecent.xml +46 -0
  104. data/fixtures/cmodels/ldpd_PTIFImageAggregator.xml +63 -0
  105. data/fixtures/cmodels/ldpd_Resource.xml +72 -0
  106. data/fixtures/cmodels/ldpd_RestrictedResource.xml +54 -0
  107. data/fixtures/cmodels/ldpd_Since.xml +62 -0
  108. data/fixtures/cmodels/ldpd_StaticAudioAggregator.xml +54 -0
  109. data/fixtures/cmodels/ldpd_StaticImageAggregator.xml +71 -0
  110. data/fixtures/cmodels/ldpd_htest.xml +54 -0
  111. data/fixtures/cmodels/ldpd_nullbind.xml +63 -0
  112. data/fixtures/cmodels/ldpd_sdef.Aggregator.xml +71 -0
  113. data/fixtures/cmodels/ldpd_sdef.Core.xml +48 -0
  114. data/fixtures/cmodels/ldpd_sdef.Image.xml +47 -0
  115. data/fixtures/cmodels/ldpd_sdef.Metadata.xml +62 -0
  116. data/fixtures/cmodels/ldpd_sdef.Resource.xml +76 -0
  117. data/fixtures/cmodels/ldpd_sdef.ZoomingImage.xml +46 -0
  118. data/fixtures/cmodels/ldpd_sdep.BagAggregator.xml +160 -0
  119. data/fixtures/cmodels/ldpd_sdep.BagAggregatorCore.xml +221 -0
  120. data/fixtures/cmodels/ldpd_sdep.ContentAggregatorCore.xml +221 -0
  121. data/fixtures/cmodels/ldpd_sdep.DynamicAggregator.xml +171 -0
  122. data/fixtures/cmodels/ldpd_sdep.DynamicAggregatorCore.xml +215 -0
  123. data/fixtures/cmodels/ldpd_sdep.JP2Image.xml +220 -0
  124. data/fixtures/cmodels/ldpd_sdep.JP2ImageAggregator.xml +167 -0
  125. data/fixtures/cmodels/ldpd_sdep.JP2ImageCore.xml +229 -0
  126. data/fixtures/cmodels/ldpd_sdep.MODSMetadata.xml +158 -0
  127. data/fixtures/cmodels/ldpd_sdep.MODSMetadataCore.xml +227 -0
  128. data/fixtures/cmodels/ldpd_sdep.PTIFImage.xml +222 -0
  129. data/fixtures/cmodels/ldpd_sdep.PTIFImageAggregator.xml +167 -0
  130. data/fixtures/cmodels/ldpd_sdep.PTIFImageCore.xml +215 -0
  131. data/fixtures/cmodels/ldpd_sdep.StaticImage.xml +210 -0
  132. data/fixtures/cmodels/ldpd_sdep.StaticImageAggregator.xml +186 -0
  133. data/fixtures/cmodels/ldpd_sdep.StaticImageCore.xml +220 -0
  134. data/fixtures/cmodels/ore_Proxy.xml +50 -0
  135. data/fixtures/spec/BLOB/test001.jpg +0 -0
  136. data/fixtures/spec/CUL_DC/dc.xml +5 -0
  137. data/fixtures/spec/CUL_MODS/mods-001.xml +25 -0
  138. data/fixtures/spec/CUL_MODS/mods-all.xml +65 -0
  139. data/fixtures/spec/CUL_MODS/mods-bad-repo.xml +7 -0
  140. data/fixtures/spec/CUL_MODS/mods-date-created-range.xml +7 -0
  141. data/fixtures/spec/CUL_MODS/mods-date-created-single.xml +6 -0
  142. data/fixtures/spec/CUL_MODS/mods-date-end-with-all-u-characters.xml +7 -0
  143. data/fixtures/spec/CUL_MODS/mods-date-issued-range.xml +7 -0
  144. data/fixtures/spec/CUL_MODS/mods-date-issued-single.xml +6 -0
  145. data/fixtures/spec/CUL_MODS/mods-date-other-range.xml +7 -0
  146. data/fixtures/spec/CUL_MODS/mods-date-other-single.xml +6 -0
  147. data/fixtures/spec/CUL_MODS/mods-date-range-short-years.xml +7 -0
  148. data/fixtures/spec/CUL_MODS/mods-date-start-with-all-u-characters.xml +7 -0
  149. data/fixtures/spec/CUL_MODS/mods-dates-with-all-u-characters.xml +7 -0
  150. data/fixtures/spec/CUL_MODS/mods-dates-with-some-u-characters.xml +7 -0
  151. data/fixtures/spec/CUL_MODS/mods-item.xml +31 -0
  152. data/fixtures/spec/CUL_MODS/mods-names.xml +35 -0
  153. data/fixtures/spec/CUL_MODS/mods-notes.xml +8 -0
  154. data/fixtures/spec/CUL_MODS/mods-ns.xml +2 -0
  155. data/fixtures/spec/CUL_MODS/mods-origin-info.xml +9 -0
  156. data/fixtures/spec/CUL_MODS/mods-part.xml +22 -0
  157. data/fixtures/spec/CUL_MODS/mods-physical-description.xml +12 -0
  158. data/fixtures/spec/CUL_MODS/mods-physical-location.xml +9 -0
  159. data/fixtures/spec/CUL_MODS/mods-record-info.xml +4 -0
  160. data/fixtures/spec/CUL_MODS/mods-relateditem-project.xml +8 -0
  161. data/fixtures/spec/CUL_MODS/mods-subjects.xml +73 -0
  162. data/fixtures/spec/CUL_MODS/mods-textual-date.xml +8 -0
  163. data/fixtures/spec/CUL_MODS/mods-titles.xml +33 -0
  164. data/fixtures/spec/CUL_MODS/mods-top-level-location-vs-relateditem-location.xml +21 -0
  165. data/fixtures/spec/CUL_MODS/mods-unmapped-project.xml +7 -0
  166. data/fixtures/spec/CUL_SOLR/mods-001.xml +1 -0
  167. data/fixtures/spec/CUL_SOLR/mods-001.yml +30 -0
  168. data/fixtures/spec/FOXML/content-aggregator.xml +64 -0
  169. data/fixtures/spec/FOXML/content-cmodel.xml +48 -0
  170. data/fixtures/spec/FOXML/image-cmodel.xml +48 -0
  171. data/fixtures/spec/FOXML/resource-max.xml +83 -0
  172. data/fixtures/spec/FOXML/resource-screen.xml +273 -0
  173. data/fixtures/spec/FOXML/resource-thumb.xml +86 -0
  174. data/fixtures/spec/FOXML/static-image-aggregator.xml +31 -0
  175. data/fixtures/spec/STRUCTMAP/structmap-examples.xml +21 -0
  176. data/fixtures/spec/STRUCTMAP/structmap-nested.xml +10 -0
  177. data/fixtures/spec/STRUCTMAP/structmap-recto.xml +4 -0
  178. data/fixtures/spec/STRUCTMAP/structmap-seq.xml +5 -0
  179. data/fixtures/spec/STRUCTMAP/structmap-unordered-seq.xml +5 -0
  180. data/lib/cul_hydra.rb +20 -0
  181. data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
  182. data/lib/cul_hydra/controllers.rb +13 -0
  183. data/lib/cul_hydra/controllers/aggregates.rb +93 -0
  184. data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
  185. data/lib/cul_hydra/controllers/catalog.rb +12 -0
  186. data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
  187. data/lib/cul_hydra/controllers/datastreams.rb +145 -0
  188. data/lib/cul_hydra/controllers/helpers.rb +10 -0
  189. data/lib/cul_hydra/controllers/helpers/active_fedora_helper_behavior.rb +9 -0
  190. data/lib/cul_hydra/controllers/helpers/application_helper_behavior.rb +16 -0
  191. data/lib/cul_hydra/controllers/helpers/dc_metadata_helper_behavior.rb +9 -0
  192. data/lib/cul_hydra/controllers/helpers/hydra_assets_helper_behavior.rb +46 -0
  193. data/lib/cul_hydra/controllers/helpers/hydra_autocomplete_helper_behavior.rb +35 -0
  194. data/lib/cul_hydra/controllers/helpers/hydra_uploader_helper_behavior.rb +34 -0
  195. data/lib/cul_hydra/controllers/helpers/resources_helper_behavior.rb +159 -0
  196. data/lib/cul_hydra/controllers/resources.rb +161 -0
  197. data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
  198. data/lib/cul_hydra/controllers/suggestions.rb +126 -0
  199. data/lib/cul_hydra/controllers/terms.rb +205 -0
  200. data/lib/cul_hydra/engine.rb +31 -0
  201. data/lib/cul_hydra/fedora.rb +41 -0
  202. data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
  203. data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
  204. data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
  205. data/lib/cul_hydra/indexer.rb +102 -0
  206. data/lib/cul_hydra/om.rb +7 -0
  207. data/lib/cul_hydra/om/standard_mods.rb +115 -0
  208. data/lib/cul_hydra/risearch_members.rb +92 -0
  209. data/lib/cul_hydra/solrizer.rb +10 -0
  210. data/lib/cul_hydra/solrizer/extractor.rb +27 -0
  211. data/lib/cul_hydra/solrizer/mods_fieldable.rb +473 -0
  212. data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
  213. data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
  214. data/lib/cul_hydra/solrizer_patch.rb +172 -0
  215. data/lib/cul_hydra/version.rb +8 -0
  216. data/lib/cul_hydra/version.rb~ +8 -0
  217. data/lib/tasks/cmodel.rake +122 -0
  218. data/lib/tasks/cul_hydra_dev.rake +54 -0
  219. data/lib/tasks/index.rake +73 -0
  220. data/lib/tasks/transform.rake +23 -0
  221. metadata +503 -0
@@ -0,0 +1,10 @@
1
+ module Cul
2
+ module Hydra
3
+ module Solrizer
4
+ autoload :Extractor, "cul_hydra/solrizer/extractor"
5
+ autoload :TerminologyBasedSolrizer, "cul_hydra/solrizer/terminology_based_solrizer"
6
+ autoload :ValueMapper, "cul_hydra/solrizer/value_mapper"
7
+ autoload :ModsFieldable, "cul_hydra/solrizer/mods_fieldable"
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,27 @@
1
+ module Cul::Hydra::Solrizer
2
+ class Extractor < ::Solrizer::Extractor
3
+ # Insert +field_value+ for +field_name+ into +solr_doc+
4
+ # Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
5
+ # Ensures that field values are always appended to arrays within the values hash.
6
+ # Ensures that values are run through format_node_value
7
+ # Also ensures that values are unique if specified
8
+ # @param [Hash] solr_doc
9
+ # @param [String] field_name
10
+ # @param [String] field_value
11
+ # @param [boolean] unique
12
+ def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
13
+ formatted_value = self.format_node_value(field_value)
14
+ if solr_doc.has_key?(field_name)
15
+ solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
16
+ else
17
+ solr_doc.merge!( {field_name => [formatted_value]} )
18
+ end
19
+ return solr_doc
20
+ end
21
+
22
+ # Instance Methods
23
+ def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
24
+ Cul::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
25
+ end
26
+ end
27
+ end
@@ -0,0 +1,473 @@
1
+ module Cul::Hydra::Solrizer
2
+ module ModsFieldable
3
+ extend ActiveSupport::Concern
4
+ include Solrizer::DefaultDescriptors::Normal
5
+
6
+ MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
7
+
8
+ module ClassMethods
9
+ def value_mapper(maps=nil)
10
+ @value_mapper ||= ValueMapper.new(maps)
11
+ end
12
+
13
+ def map_field(field_key, map_key)
14
+ value_mapper.map_field(field_key, map_key)
15
+ end
16
+
17
+ def map_value(field_key, value_key)
18
+ value_mapper.map_value(field_key, value_key)
19
+ end
20
+
21
+ def maps_field?(field_key)
22
+ value_mapper.maps_field? field_key
23
+ end
24
+ def normalize(t, strip_punctuation=false)
25
+ # strip whitespace
26
+ n_t = t.dup.strip
27
+ # collapse intermediate whitespace
28
+ n_t.gsub!(/\s+/, ' ')
29
+ # pull off paired punctuation, and any leading punctuation
30
+ if strip_punctuation
31
+ n_t = n_t.sub(/^\((.*)\)$/, "\\1")
32
+ n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
33
+ n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
34
+ n_t = n_t.sub(/^"(.*)"$/, "\\1")
35
+ n_t = n_t.sub(/^'(.*)'$/, "\\1")
36
+ n_t = n_t.sub(/^<(.*)>$/, "\\1")
37
+ #n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
38
+ n_t = n_t.sub(/^[[:punct:]]+/, '')
39
+ # this may have 'created' leading/trailing space, so strip
40
+ n_t.strip!
41
+ end
42
+ n_t
43
+ end
44
+ end
45
+
46
+ extend ClassMethods
47
+
48
+ def mods
49
+ ng_xml.xpath('/mods:mods', MODS_NS).first
50
+ end
51
+
52
+ def projects
53
+ mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
54
+ ModsFieldable.normalize(main_title(p_node), true)
55
+ end
56
+ end
57
+
58
+ def collections
59
+ mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
60
+ ModsFieldable.normalize(main_title(p_node), true)
61
+ end
62
+ end
63
+
64
+ def sort_title(node=mods)
65
+ # include only the untyped [!@type] titleInfo, exclude noSort
66
+ base_text = ''
67
+ t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
68
+ if t
69
+ t.children.each do |child|
70
+ base_text << child.text unless child.name == 'nonSort'
71
+ end
72
+ end
73
+ base_text = ModsFieldable.normalize(base_text, true)
74
+ base_text = nil if base_text.empty?
75
+ base_text
76
+ end
77
+
78
+ def main_title(node=mods)
79
+ # include only the untyped [!@type] titleInfo
80
+ t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
81
+ if t
82
+ ModsFieldable.normalize(t.text)
83
+ else
84
+ nil
85
+ end
86
+ end
87
+
88
+ def titles(node=mods)
89
+ # all titles without descending into relatedItems
90
+ # For now, this only includes the main title and selected alternate_titles
91
+ all_titles = []
92
+ all_titles << main_title unless main_title.nil?
93
+ all_titles += alternative_titles unless alternative_titles.nil?
94
+ end
95
+
96
+ def alternative_titles(node=mods)
97
+ node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
98
+ ModsFieldable.normalize(t.text)
99
+ end
100
+ end
101
+
102
+ def names(role_authority=nil, role=nil)
103
+ # get all the name nodes
104
+ # keep all child text except the role terms
105
+ xpath = "./mods:name"
106
+ unless role_authority.nil?
107
+ xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
108
+ unless role.nil?
109
+ xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
110
+ end
111
+ xpath << "]/ancestor::mods:name"
112
+ end
113
+ names = mods.xpath(xpath, MODS_NS).collect do |node|
114
+ base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
115
+ ModsFieldable.normalize(base_text, true)
116
+ end
117
+
118
+ # Note: Removing subject names from name field extraction.
119
+ # See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
120
+ #xpath = "./mods:subject" + xpath[1,xpath.length]
121
+ #mods.xpath(xpath, MODS_NS).each do |node|
122
+ # base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
123
+ # names << ModsFieldable.normalize(base_text, true)
124
+ #end
125
+
126
+ names
127
+ end
128
+
129
+ def dates(node=mods)
130
+ # get all the dateIssued with keyDate = 'yes', but not point = 'end'
131
+ end
132
+
133
+ def formats(node=mods)
134
+ # get all the form values with authority != 'marcform'
135
+ node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
136
+ ModsFieldable.normalize(n.text)
137
+ end
138
+ end
139
+
140
+ def repository_code(node=mods)
141
+ # get the location/physicalLocation[@authority = 'marcorg']
142
+ repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
143
+
144
+ if repo_code_node
145
+ ModsFieldable.normalize(repo_code_node.text)
146
+ else
147
+ return nil
148
+ end
149
+ end
150
+
151
+ def repository_text(node=mods)
152
+ # get the location/physicalLocation[not(@authority)]
153
+ repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
154
+
155
+ if repo_text_node
156
+ ModsFieldable.normalize(repo_text_node.text)
157
+ else
158
+ return nil
159
+ end
160
+ end
161
+
162
+ def translate_repo_marc_code(code, type)
163
+ #code = ModsFieldable.normalize(code)
164
+
165
+ if type == 'short'
166
+ return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
167
+ elsif type == 'long'
168
+ return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
169
+ elsif type == 'full'
170
+ return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
171
+ end
172
+
173
+ return nil
174
+ end
175
+
176
+ def translate_project_title(project_title, type)
177
+ normalized_project_title = ModsFieldable.normalize(project_title)
178
+
179
+ if type == 'short'
180
+ return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
181
+ elsif type == 'full'
182
+ return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
183
+ end
184
+
185
+ return nil
186
+ end
187
+
188
+ def shelf_locators(node=mods)
189
+ node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
190
+ ModsFieldable.normalize(n.text, true)
191
+ end
192
+ end
193
+
194
+ def textual_dates(node=mods)
195
+ dates = []
196
+ node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
197
+ dates << ModsFieldable.normalize(n.text, true)
198
+ end
199
+ node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
200
+ dates << ModsFieldable.normalize(n.text, true)
201
+ end
202
+ node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
203
+ dates << ModsFieldable.normalize(n.text, true)
204
+ end
205
+ return dates
206
+ end
207
+
208
+ def date_range_to_textual_date(start_year, end_year)
209
+ start_year = start_year.to_i.to_s # Remove zero-padding if present
210
+ end_year = end_year.to_i.to_s # Remove zero-padding if present
211
+
212
+ if start_year == end_year
213
+ return [start_year]
214
+ else
215
+ return [('Between ' +
216
+ (start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
217
+ ' and ' +
218
+ (end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
219
+ )]
220
+ end
221
+ end
222
+
223
+ def date_notes(node=mods)
224
+ date_notes = []
225
+ node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
226
+ date_notes << ModsFieldable.normalize(n.text, true)
227
+ end
228
+ return date_notes
229
+ end
230
+
231
+ def non_date_notes(node=mods)
232
+ non_date_notes = []
233
+ node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
234
+ if n.attr('type') == 'view direction'
235
+ non_date_notes << 'View Direction: ' + ModsFieldable.normalize(n.text, true)
236
+ else
237
+ non_date_notes << ModsFieldable.normalize(n.text, true)
238
+ end
239
+
240
+
241
+ end
242
+ return non_date_notes
243
+ end
244
+
245
+ def item_in_context_url(node=mods)
246
+ item_in_context_url_val = []
247
+ node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
248
+ item_in_context_url_val << ModsFieldable.normalize(n.text, true)
249
+ end
250
+ item_in_context_url_val
251
+ end
252
+
253
+ def non_item_in_context_url(node=mods)
254
+ non_item_in_context_url_val = []
255
+ node.xpath("./mods:location/mods:url[not(@access='object in context')]", MODS_NS).collect do |n|
256
+ non_item_in_context_url_val << ModsFieldable.normalize(n.text, true)
257
+ end
258
+ non_item_in_context_url_val
259
+ end
260
+
261
+ def project_url(node=mods)
262
+ project_url_val = []
263
+ node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
264
+ project_url_val << ModsFieldable.normalize(n.text, true)
265
+ end
266
+ project_url_val
267
+ end
268
+
269
+ def all_subjects(node=mods)
270
+ list_of_subjects = []
271
+
272
+ node.xpath("./mods:subject[not(@authority) or @authority != 'Durst']/mods:topic", MODS_NS).collect do |n|
273
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
274
+ end
275
+ node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
276
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
277
+ end
278
+ node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
279
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
280
+ end
281
+ node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
282
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
283
+ end
284
+ node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
285
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
286
+ end
287
+ node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
288
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
289
+ end
290
+
291
+ return list_of_subjects
292
+ end
293
+
294
+ def durst_subjects(node=mods)
295
+ list_of_subjects = []
296
+ node.xpath("./mods:subject[@authority='Durst']/mods:topic", MODS_NS).collect do |n|
297
+ list_of_subjects << ModsFieldable.normalize(n.text, true)
298
+ end
299
+ return list_of_subjects
300
+ end
301
+
302
+ def origin_info_place(node=mods)
303
+ places = []
304
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
305
+ places << ModsFieldable.normalize(n.text, true)
306
+ end
307
+ return places
308
+ end
309
+
310
+ def origin_info_place_for_display(node=mods)
311
+ # If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
312
+ places_with_uri = []
313
+ places_without_uri = []
314
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
315
+ places_with_uri << ModsFieldable.normalize(n.text, true)
316
+ end
317
+ node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
318
+ places_without_uri << ModsFieldable.normalize(n.text, true)
319
+ end
320
+
321
+ return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
322
+ end
323
+
324
+ def coordinates(node=mods)
325
+ coordinate_values = []
326
+ node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
327
+ n = ModsFieldable.normalize(n.text, true)
328
+ if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
329
+ coordinate_values << n
330
+ end
331
+ end
332
+ coordinate_values
333
+ end
334
+
335
+ def to_solr(solr_doc={})
336
+ solr_doc = (defined? super) ? super : solr_doc
337
+
338
+ return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process, otherwise NoMethodError will be raised by subsequent lines.
339
+
340
+ solr_doc["all_text_teim"] ||= []
341
+
342
+ solr_doc["title_si"] = sort_title
343
+ solr_doc["title_ssm"] = titles
344
+ solr_doc["alternative_title_ssm"] = alternative_titles
345
+ solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
346
+ solr_doc["lib_collection_sim"] = collections
347
+ solr_doc["lib_name_sim"] = names
348
+ solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
349
+ solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
350
+ solr_doc["lib_all_subjects_ssm"] = all_subjects
351
+ solr_doc["durst_subjects_ssim"] = durst_subjects
352
+ solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
353
+ solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
354
+ solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
355
+ solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
356
+ solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
357
+ solr_doc["lib_format_sim"] = formats
358
+ solr_doc["lib_shelf_sim"] = shelf_locators
359
+ solr_doc["lib_date_textual_ssm"] = textual_dates
360
+ solr_doc["lib_date_notes_ssm"] = date_notes
361
+ solr_doc["lib_non_date_notes_ssm"] = non_date_notes
362
+ solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
363
+ solr_doc["lib_non_item_in_context_url_ssm"] = non_item_in_context_url
364
+ solr_doc["lib_project_url_ssm"] = project_url
365
+ solr_doc["origin_info_place_ssm"] = origin_info_place
366
+ solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
367
+
368
+ repo_marc_code = repository_code
369
+ unless repo_marc_code.nil?
370
+ solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
371
+ solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
372
+ solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
373
+ end
374
+ solr_doc["lib_repo_text_ssm"] = repository_text
375
+
376
+ project_titles = projects
377
+ unless project_titles.nil?
378
+ solr_doc["lib_project_short_ssim"] = []
379
+ solr_doc["lib_project_full_ssim"] = []
380
+ project_titles.each {|project_title|
381
+ solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
382
+ solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
383
+ }
384
+ solr_doc["lib_project_short_ssim"].uniq!
385
+ solr_doc["lib_project_full_ssim"].uniq!
386
+ end
387
+
388
+ # Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
389
+ possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
390
+ possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
391
+ start_date = nil
392
+ end_date = nil
393
+ start_year = nil
394
+ end_year = nil
395
+ possible_start_date_fields.each{|key|
396
+ if solr_doc.has_key?(key)
397
+ start_date = solr_doc[key][0]
398
+ break
399
+ end
400
+ }
401
+ possible_end_date_fields.each{|key|
402
+ if solr_doc.has_key?(key)
403
+ end_date = solr_doc[key][0]
404
+ break
405
+ end
406
+ }
407
+
408
+ if start_date.present?
409
+
410
+ start_year = nil
411
+ end_year = nil
412
+
413
+ start_date = nil if start_date == 'uuuu'
414
+ end_date = nil if end_date == 'uuuu'
415
+ start_date = start_date.gsub('u', '0') unless start_date.nil?
416
+ end_date = end_date.gsub('u', '0') unless end_date.nil?
417
+
418
+ end_date = start_date if end_date.blank?
419
+ start_date = end_date if start_date.blank?
420
+
421
+ year_regex = /^(-?\d{1,4}).*/
422
+
423
+ unless start_date.blank?
424
+ start_year_match = start_date.match(year_regex)
425
+ if start_year_match && start_year_match.captures.length > 0
426
+ start_year = start_year_match.captures[0]
427
+ start_year = zero_pad_year(start_year)
428
+ solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
429
+ end
430
+ end
431
+
432
+ unless end_date.blank?
433
+ end_year_match = end_date.match(year_regex)
434
+ if end_year_match && end_year_match.captures.length > 0
435
+ end_year = end_year_match.captures[0]
436
+ end_year = zero_pad_year(end_year)
437
+ solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
438
+ end
439
+ end
440
+
441
+ solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year && end_year
442
+ solr_doc["lib_date_year_range_ss"] = solr_doc["lib_date_year_range_si"]
443
+
444
+ # When no textual date is available, fall back to other date data (if available)
445
+ if solr_doc["lib_date_textual_ssm"].blank?
446
+ solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
447
+ end
448
+ end
449
+
450
+ # Geo data
451
+ solr_doc["geo"] = coordinates
452
+
453
+ solr_doc.each do |k, v|
454
+ if self.class.maps_field? k
455
+ solr_doc[k] = self.class.map_value(k, v)
456
+ end
457
+ end
458
+
459
+ solr_doc
460
+ end
461
+
462
+ def zero_pad_year(year)
463
+ year = year.to_s
464
+ is_negative = year.start_with?('-')
465
+ year_without_sign = (is_negative ? year[1, year.length]: year)
466
+ if year_without_sign.length < 4
467
+ year_without_sign = year_without_sign.rjust(4, '0')
468
+ end
469
+
470
+ return (is_negative ? '-' : '') + year_without_sign
471
+ end
472
+ end
473
+ end