cul_scv_hydra 0.22.6 → 0.22.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (101) hide show
  1. checksums.yaml +4 -4
  2. data/app/controllers/concerns/cul/hydra/controller.rb +22 -0
  3. data/app/controllers/concerns/cul/hydra/resolver.rb +69 -0
  4. data/app/controllers/concerns/cul/hydra/thumbnails.rb +62 -0
  5. data/app/controllers/concerns/cul/scv/hydra/controller.rb +3 -19
  6. data/app/controllers/concerns/cul/scv/hydra/resolver.rb +2 -65
  7. data/app/controllers/concerns/cul/scv/hydra/thumbnails.rb +3 -59
  8. data/app/models/concept.rb +1 -1
  9. data/app/models/concerns/cul/hydra/models.rb +24 -0
  10. data/app/models/concerns/cul/hydra/models/aggregator.rb +121 -0
  11. data/app/models/concerns/cul/hydra/models/common.rb +220 -0
  12. data/app/models/concerns/cul/hydra/models/image_resource.rb +78 -0
  13. data/app/models/concerns/cul/hydra/models/linkable_resources.rb +108 -0
  14. data/app/models/concerns/cul/hydra/models/resource.rb +87 -0
  15. data/app/models/concerns/cul/scv/hydra/models.rb +1 -13
  16. data/app/models/concerns/cul/scv/hydra/models/aggregator.rb +1 -116
  17. data/app/models/concerns/cul/scv/hydra/models/common.rb +1 -213
  18. data/app/models/concerns/cul/scv/hydra/models/image_resource.rb +3 -75
  19. data/app/models/concerns/cul/scv/hydra/models/linkable_resources.rb +3 -105
  20. data/app/models/concerns/cul/scv/hydra/models/resource.rb +2 -83
  21. data/app/models/cul/hydra/datastreams/dc_metadata.rb +107 -0
  22. data/app/models/cul/hydra/datastreams/mods_document.rb +195 -0
  23. data/app/models/cul/hydra/datastreams/struct_metadata.rb +176 -0
  24. data/app/models/cul/scv/hydra/datastreams/dc_metadata.rb +5 -104
  25. data/app/models/cul/scv/hydra/datastreams/mods_document.rb +5 -178
  26. data/app/models/cul/scv/hydra/datastreams/struct_metadata.rb +5 -174
  27. data/app/models/dc_document.rb +1 -1
  28. data/app/models/generic_aggregator.rb +5 -5
  29. data/app/models/generic_object.rb +2 -2
  30. data/app/models/generic_resource.rb +4 -4
  31. data/app/models/mets_structured_aggregator.rb +2 -2
  32. data/app/models/resource.rb +3 -3
  33. data/app/models/resource_aggregator.rb +3 -3
  34. data/fixtures/spec/CUL_MODS/mods-subjects.xml +24 -0
  35. data/lib/cul_hydra.rb +18 -0
  36. data/lib/cul_hydra/access_controls_enforcement.rb +53 -0
  37. data/lib/cul_hydra/controllers.rb +13 -0
  38. data/lib/cul_hydra/controllers/aggregates.rb +93 -0
  39. data/lib/cul_hydra/controllers/aggregator_controller_helper.rb +27 -0
  40. data/lib/cul_hydra/controllers/catalog.rb +12 -0
  41. data/lib/cul_hydra/controllers/content_aggregators.rb +81 -0
  42. data/lib/cul_hydra/controllers/datastreams.rb +145 -0
  43. data/lib/cul_hydra/controllers/helpers.rb +10 -0
  44. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/active_fedora_helper_behavior.rb +1 -1
  45. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/application_helper_behavior.rb +1 -1
  46. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/dc_metadata_helper_behavior.rb +1 -1
  47. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_assets_helper_behavior.rb +1 -1
  48. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_autocomplete_helper_behavior.rb +1 -1
  49. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/hydra_uploader_helper_behavior.rb +1 -1
  50. data/lib/{cul_scv_hydra → cul_hydra}/controllers/helpers/resources_helper_behavior.rb +1 -1
  51. data/lib/cul_hydra/controllers/resources.rb +161 -0
  52. data/lib/cul_hydra/controllers/static_image_aggregators.rb +105 -0
  53. data/lib/cul_hydra/controllers/suggestions.rb +126 -0
  54. data/lib/cul_hydra/controllers/terms.rb +205 -0
  55. data/lib/cul_hydra/engine.rb +31 -0
  56. data/lib/cul_hydra/fedora.rb +41 -0
  57. data/lib/cul_hydra/fedora/dummy_object.rb +37 -0
  58. data/lib/cul_hydra/fedora/rubydora_patch.rb +16 -0
  59. data/lib/cul_hydra/fedora/url_helper_behavior.rb +32 -0
  60. data/lib/cul_hydra/indexer.rb +84 -0
  61. data/lib/cul_hydra/om.rb +7 -0
  62. data/lib/cul_hydra/om/standard_mods.rb +115 -0
  63. data/lib/cul_hydra/risearch_members.rb +92 -0
  64. data/lib/cul_hydra/solrizer.rb +10 -0
  65. data/lib/cul_hydra/solrizer/extractor.rb +27 -0
  66. data/lib/cul_hydra/solrizer/mods_fieldable.rb +435 -0
  67. data/lib/cul_hydra/solrizer/terminology_based_solrizer.rb +35 -0
  68. data/lib/cul_hydra/solrizer/value_mapper.rb +46 -0
  69. data/lib/{cul_scv_hydra/solrizer/field_mapper.rb → cul_hydra/solrizer_patch.rb} +0 -0
  70. data/lib/cul_hydra/version.rb +8 -0
  71. data/lib/cul_hydra/version.rb~ +8 -0
  72. data/lib/cul_scv_fedora/dummy_object.rb +1 -30
  73. data/lib/cul_scv_fedora/rubydora_patch.rb +3 -7
  74. data/lib/cul_scv_fedora/url_helper_behavior.rb +3 -23
  75. data/lib/cul_scv_hydra.rb +5 -32
  76. data/lib/cul_scv_hydra/access_controls_enforcement.rb +3 -50
  77. data/lib/cul_scv_hydra/controllers.rb +10 -10
  78. data/lib/cul_scv_hydra/controllers/aggregates.rb +1 -86
  79. data/lib/cul_scv_hydra/controllers/aggregator_controller_helper.rb +4 -23
  80. data/lib/cul_scv_hydra/controllers/catalog.rb +5 -9
  81. data/lib/cul_scv_hydra/controllers/content_aggregators.rb +4 -77
  82. data/lib/cul_scv_hydra/controllers/datastreams.rb +3 -140
  83. data/lib/cul_scv_hydra/controllers/helpers.rb +44 -8
  84. data/lib/cul_scv_hydra/controllers/resources.rb +4 -157
  85. data/lib/cul_scv_hydra/controllers/static_image_aggregators.rb +4 -100
  86. data/lib/cul_scv_hydra/controllers/suggestions.rb +4 -122
  87. data/lib/cul_scv_hydra/controllers/terms.rb +4 -201
  88. data/lib/cul_scv_hydra/engine.rb +1 -1
  89. data/lib/cul_scv_hydra/indexer.rb +3 -82
  90. data/lib/cul_scv_hydra/om.rb +2 -2
  91. data/lib/cul_scv_hydra/om/standard_mods.rb +1 -108
  92. data/lib/cul_scv_hydra/risearch_members.rb +4 -89
  93. data/lib/cul_scv_hydra/solrizer.rb +5 -6
  94. data/lib/cul_scv_hydra/solrizer/extractor.rb +1 -25
  95. data/lib/cul_scv_hydra/solrizer/scv_mods_fieldable.rb +4 -429
  96. data/lib/cul_scv_hydra/solrizer/terminology_based_solrizer.rb +4 -32
  97. data/lib/cul_scv_hydra/solrizer/value_mapper.rb +1 -44
  98. data/lib/cul_scv_hydra/version.rb +5 -5
  99. data/lib/tasks/index.rake +2 -2
  100. data/lib/tasks/transform.rake +23 -0
  101. metadata +55 -12
@@ -28,4 +28,4 @@ module Cul::Scv::Hydra
28
28
  end
29
29
  end
30
30
  end
31
- end
31
+ end
@@ -1,85 +1,6 @@
1
1
  module Cul::Scv::Hydra::Indexer
2
-
3
- def self.descend_from(pid, pids_to_omit=nil, verbose_output=false)
4
- if pid.blank?
5
- raise 'Please supply a pid (e.g. rake recursively_index_fedora_objects pid=ldpd:123)'
6
- end
7
-
8
- begin
9
-
10
- unless ActiveFedora::Base.exists?(pid)
11
- raise 'Could not find Fedora object with pid: ' + pid
12
- end
13
-
14
- if pids_to_omit.present? && pids_to_omit.include?(pid)
15
- puts 'Skipping topmost object in this set (' + pid + ') because it has been intentionally omitted...' if verbose_output
16
- else
17
- puts 'Indexing topmost object in this set (' + pid + ')...' if verbose_output
18
- puts 'If this is a BagAggregator with a lot of members, this may take a while...' if verbose_output
19
-
20
- yield pid
21
-
22
- end
23
-
24
- puts 'Recursively retreieving and indexing all members of ' + pid + '...'
25
-
26
- unique_pids = Cul::Scv::Hydra::RisearchMembers.get_recursive_member_pids(pid, true)
27
-
28
- total_number_of_members = unique_pids.length
29
- puts 'Recursive search found ' + total_number_of_members.to_s + ' members.' if verbose_output
30
-
31
- if pids_to_omit.present?
32
- unique_pids = unique_pids - pids_to_omit
33
- total_number_of_members = unique_pids.length
34
- puts 'After checking against the list of omitted pids, the total number of objects to index will be: ' + total_number_of_members.to_s if verbose_output
35
- end
36
-
37
- i = 1
38
- if total_number_of_members > 0
39
- unique_pids.each {|pid|
40
-
41
- puts 'Recursing on ' + i.to_s + ' of ' + total_number_of_members.to_s + ' members (' + pid + ')...' if verbose_output
42
-
43
- yield pid
44
-
45
- i += 1
46
- }
47
- end
48
-
49
- rescue RestClient::Unauthorized => e
50
- error_message = "Skipping #{pid} due to error: " + e.message + '. Problem with Fedora object?'
51
- puts error_message
52
- logger.error error_message if defined?(logger)
53
- end
54
-
55
- puts 'Recursion complete!'
56
-
2
+ extend ActiveSupport::Concern
3
+ included do
4
+ include Cul::Hydra::Indexer
57
5
  end
58
- def self.recursively_index_fedora_objects(top_pid, pids_to_omit=nil, skip_generic_resources=false, verbose_output=false)
59
-
60
- descend_from(top_pid, pids_to_omit, verbose_output) do |pid|
61
- self.index_pid(pid, skip_generic_resources, verbose_output)
62
- end
63
-
64
- end
65
-
66
- def self.index_pid(pid, skip_generic_resources=false, verbose_output=false)
67
- # We found an object with the desired PID. Let's reindex it
68
- begin
69
- active_fedora_object = ActiveFedora::Base.find(pid, :cast => true)
70
-
71
- if skip_generic_resources && active_fedora_object.is_a?(GenericResource)
72
- puts 'Object was skipped because GenericResources are being skipped and it is a GenericResource.'
73
- else
74
- active_fedora_object.update_index
75
- puts 'done.' if verbose_output
76
- end
77
- rescue SystemExit, Interrupt => e
78
- # Allow system interrupt (ctrl+c)
79
- raise e
80
- rescue Exception => e
81
- puts "Encountered problem with #{pid}. Skipping record. Exception: #{e.message}"
82
- end
83
- end
84
-
85
6
  end
@@ -2,8 +2,8 @@ module Cul
2
2
  module Scv
3
3
  module Hydra
4
4
  module Om
5
+ autoload :StandardMods, "cul_scv_hydra/om/standard_mods"
5
6
  end
6
7
  end
7
8
  end
8
- end
9
- require "cul_scv_hydra/om/standard_mods"
9
+ end
@@ -1,115 +1,8 @@
1
- require 'active-fedora'
2
- require 'solrizer'
3
- require 'cul_scv_hydra/solrizer'
4
1
  module Cul
5
2
  module Scv
6
3
  module Hydra
7
4
  module Om
8
- class StandardMods < ::ActiveFedora::OmDatastream
9
-
10
- set_terminology do |t|
11
- t.root(:path=>"mods",
12
- :xmlns=>"http://www.loc.gov/mods/v3",
13
- :schema=>"http://www.loc.gov/standards/mods/v3/mods-3-4.xsd")
14
- t.identifier(:path=>"identifier", :attributes=>{:type=>"local"}, :data_type=>:symbol)
15
- t.clio(:path=>"identifier", :attributes=>{:type=>"CLIO"}, :data_type=>:symbol)
16
- t.title_info(:path=>"titleInfo", :index_as=>[:not_searchable]) {
17
- t.main_title(:path=>"title", :index_as=>[:not_searchable])
18
- }
19
- t.title(:path=>'mods/oxns:titleInfo/oxns:title', :index_as=>[:searchable,:displayable, :sortable])
20
- t.abstract
21
- t.subject {
22
- t.topic
23
- }
24
- t.type_of_resource(:path=>"typeOfResource", :index_as=>[:not_searchable])
25
- t.physical_description(:path=>"physicalDescription", :index_as=>[:not_searchable]){
26
- t.form_marc(:path=>"form", :attributes=>{:authority=>"marcform"}, :index_as=>[:not_searchable])
27
- t.form_nomarc(:path=>"form[@authority !='marcform']", :index_as=>[:not_searchable, :displayable, :facetable, :textable])
28
- t.extent(:path=>"extent", :index_as=>[:not_searchable])
29
- t.reformatting_quality(:path=>"reformattingQuality", :index_as=>[:not_searchable])
30
- t.internet_media_type(:path=>"internetMediaType", :index_as=>[:not_searchable])
31
- t.digital_origin(:path=>"digitalOrigin", :index_as=>[:not_searchable])
32
- }
33
- t.lib_format(proxy: [:physical_description, :form_nomarc] )
34
- t.location(:path=>"location", :index_as=>[:not_searchable]){
35
- t.repo_text(:path=>"physicalLocation",:attributes=>{:authority=>:none}, :index_as=>[:not_searchable])
36
- t.repo_code(:path=>"physicalLocation",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
37
- }
38
- t.lib_repo_text(:ref=>[:location, :repo_text], :label=>"lib_repo", :index_as=>[:searchable])
39
- t.lib_repo(:ref=>[:location, :repo_code], :index_as=>[:not_searchable,:facetable, :displayable])
40
- t.project_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Project"}, :index_as=>[:not_searchable]){
41
- t.p_title(:path=>'titleInfo',:index_as=>[:not_searchable])
42
- }
43
- t.lib_project(:proxy=>[:project_host, :p_title],:index_as=>[:facetable,:displayable, :not_searchable])
44
- t.collection_host(:path=>"relatedItem", :attributes=>{:type=>"host", :displayLabel=>"Collection"}, :index_as=>[:not_searchable]){
45
- t.c_title(:path=>'titleInfo',:index_as=>[:not_searchable])
46
- }
47
- t.lib_project(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Project']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
48
- t.lib_collection(:path=>"mods/oxns:relatedItem[@type='host'][@displayLabel='Collection']/oxns:titleInfo/oxns:title",:index_as=>[:facetable,:displayable, :not_searchable])
49
- t.note(:path=>"note")
50
- t.access_condition(:path=>"accessCondition", :attributes=>{:type=>"useAndReproduction"}, :index_as => [:searchable], :data_type => :symbol)
51
- t.record_info(:path=>"recordInfo", :index_as=>[:not_searchable]) {
52
- t.record_creation_date(:path=>"recordCreationDate",:attributes=>{:encoding=>"w3cdtf"}, :index_as=>[:not_searchable])
53
- t.record_content_source(:path=>"recordContentSource",:attributes=>{:authority=>"marcorg"}, :index_as=>[:not_searchable])
54
- t.language_of_cataloging(:path=>"languageOfCataloging", :index_as=>[:not_searchable]){
55
- t.language_term(:path=>"languageTerm", :index_as=>[:not_searchable], :attributes=>{:type=>:none})
56
- t.language_code(:path=>"languageTerm",:attributes=>{:type=>'code',:authority=>"iso639-2b"}, :index_as=>[:not_searchable])
57
- }
58
- t.record_origin(:path=>"recordOrigin", :index_as=>[:not_searchable])
59
- }
60
-
61
- t.origin_info(:path=>"originInfo", :index_as=>[:not_searchable]){
62
- t.date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf'}, :index_as=>[:not_searchable])
63
- t.key_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes'}, :index_as=>[:not_searchable])
64
- t.start_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:keyDate=>'yes',:point=>'start'}, :index_as=>[:not_searchable])
65
- t.end_date(:path=>"dateIssued", :attributes=>{:encoding=>'w3cdtf',:point=>'end'}, :index_as=>[:not_searchable])
66
- }
67
- end
68
-
69
- def self.xml_template
70
- builder = Nokogiri::XML::Builder.new do |xml|
71
- xml.mods(:version=>"3.4",
72
- "xmlns"=>"http://www.loc.gov/mods/v3",
73
- "xmlns:xsi"=>"http://www.w3.org/2001/XMLSchema-instance"){
74
- }
75
- end
76
- builder.doc.encoding = 'UTF-8'
77
- builder.doc.root["xsi:schemaLocation"] = 'http://www.loc.gov/mods/v3 http://www.loc.gov/standards/mods/v3/mods-3-4.xsd'
78
- return builder.doc
79
- end
80
-
81
- def prefix
82
- #if ::ActiveFedora::VERSION >= '8'
83
- # Rails.logger.warn("the prefix method of #{self.class.name} was overriden to maintain backwards compatibility")
84
- #end
85
- ''
86
- end
87
-
88
- def method_missing method, *args
89
- query = false
90
- _mname = method.id2name
91
- if _mname[-1,1] == '?'
92
- query = true
93
- _mname = _mname[0,_mname.length-1]
94
- end
95
- _msym = _mname.to_sym
96
- begin
97
- has_term = self.class.terminology.has_term?(_msym)
98
-
99
- _r = (has_term)? find_by_terms(_msym, *args) : nil
100
- if query
101
- return !( _r.nil? || _r.size()==0)
102
- else
103
- return _r
104
- end
105
- rescue
106
- super
107
- end
108
- end
109
- def update_values(params)
110
- super
111
- self.dirty = true
112
- end
5
+ class StandardMods < Cul::Hydra::Om::StandardMods
113
6
  end
114
7
  end
115
8
  end
@@ -1,92 +1,7 @@
1
1
  module Cul::Scv::Hydra::RisearchMembers
2
-
3
- def self.get_recursive_member_pids(pid, verbose_output=false, cmodel_type='all')
4
-
5
- recursive_member_query =
6
- 'select $child $parent from <#ri>
7
- where
8
- walk($child <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '> and $child <http://purl.oclc.org/NET/CUL/memberOf> $parent)'
9
-
10
- unless cmodel_type == 'all'
11
- recursive_member_query += ' and $child <fedora-model:hasModel> $cmodel'
12
- recursive_member_query += ' and $cmodel <mulgara:is> <info:fedora/ldpd:' + cmodel_type + '>'
13
- end
14
-
15
- puts 'Performing query:' if verbose_output
16
- puts recursive_member_query if verbose_output
17
-
18
- search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(recursive_member_query, {
19
- :type => 'tuples',
20
- :format => 'json',
21
- :limit => '',
22
- :stream => 'on'
23
- }))
24
-
25
- unique_pids = search_response['results'].map{|result| result['child'].gsub('info:fedora/', '') }.uniq
26
-
27
- return unique_pids
28
-
2
+ extend ActiveSupport::Concern
3
+ extend Cul::Hydra::RisearchMembers::ClassMethods
4
+ included do
5
+ include Cul::Hydra::RisearchMembers
29
6
  end
30
-
31
- def self.get_direct_member_results(pid, verbose_output=false, format='json')
32
-
33
- direct_member_query =
34
- 'select $pid from <#ri>
35
- where $pid <http://purl.oclc.org/NET/CUL/memberOf> <fedora:' + pid + '>'
36
-
37
- puts 'Performing query:' if verbose_output
38
- puts direct_member_query if verbose_output
39
-
40
- search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(direct_member_query, {
41
- :type => 'tuples',
42
- :format => format,
43
- :limit => '',
44
- :stream => 'on'
45
- }))
46
-
47
- return search_response['results']
48
- end
49
-
50
- def self.get_direct_member_pids(pid, verbose_output=false)
51
- unique_pids = get_direct_member_results(pid,verbose_output,'json')
52
- unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
53
- end
54
-
55
- def self.get_direct_member_count(pid, verbose_output=false)
56
- count = get_direct_member_results(pid,verbose_output,'count/json')
57
- return count.blank? ? 0 : count[0]['count'].to_i
58
- end
59
-
60
-
61
- #Project constituents
62
-
63
- def self.get_project_constituent_results(pid, verbose_output=false, format='json')
64
-
65
- project_constituent_query =
66
- 'select $pid from <#ri>
67
- where $pid <info:fedora/fedora-system:def/relations-external#isConstituentOf> <fedora:' + pid + '>'
68
-
69
- puts 'Performing query:' if verbose_output
70
- puts project_constituent_query if verbose_output
71
-
72
- search_response = JSON(Cul::Scv::Fedora.repository.find_by_itql(project_constituent_query, {
73
- :type => 'tuples',
74
- :format => format,
75
- :limit => '',
76
- :stream => 'on'
77
- }))
78
-
79
- return search_response['results']
80
- end
81
-
82
- def self.get_project_constituent_pids(pid, verbose_output=false)
83
- unique_pids = get_project_constituent_results(pid,verbose_output,'json')
84
- unique_pids.map{|result| result['pid'].gsub('info:fedora/', '') }.uniq
85
- end
86
-
87
- def self.get_project_constituent_count(pid, verbose_output=false)
88
- count = get_project_constituent_results(pid,verbose_output,'count/json')
89
- return count.blank? ? 0 : count[0]['count'].to_i
90
- end
91
-
92
7
  end
@@ -2,12 +2,11 @@ module Cul
2
2
  module Scv
3
3
  module Hydra
4
4
  module Solrizer
5
- autoload :Extractor, "cul_scv_hydra/solrizer/extractor"
6
- autoload :TerminologyBasedSolrizer, "cul_scv_hydra/solrizer/terminology_based_solrizer"
7
- autoload :ValueMapper, "cul_scv_hydra/solrizer/value_mapper"
8
- autoload :ScvModsFieldable, "cul_scv_hydra/solrizer/scv_mods_fieldable"
5
+ autoload :Extractor, "cul_scv_hydra/solrizer/extractor"
6
+ autoload :TerminologyBasedSolrizer, "cul_scv_hydra/solrizer/terminology_based_solrizer"
7
+ autoload :ValueMapper, "cul_scv_hydra/solrizer/value_mapper"
8
+ autoload :ScvModsFieldable, "cul_scv_hydra/solrizer/scv_mods_fieldable"
9
9
  end
10
10
  end
11
11
  end
12
- end
13
- require "cul_scv_hydra/solrizer/field_mapper"
12
+ end
@@ -1,27 +1,3 @@
1
1
  module Cul::Scv::Hydra::Solrizer
2
- class Extractor < ::Solrizer::Extractor
3
- # Insert +field_value+ for +field_name+ into +solr_doc+
4
- # Handles inserting new values into a Hash while ensuring that you don't destroy or overwrite any existing values in the hash.
5
- # Ensures that field values are always appended to arrays within the values hash.
6
- # Ensures that values are run through format_node_value
7
- # Also ensures that values are unique if specified
8
- # @param [Hash] solr_doc
9
- # @param [String] field_name
10
- # @param [String] field_value
11
- # @param [boolean] unique
12
- def self.insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
13
- formatted_value = self.format_node_value(field_value)
14
- if solr_doc.has_key?(field_name)
15
- solr_doc[field_name] << formatted_value unless (unique and solr_doc[field_name].include? formatted_value)
16
- else
17
- solr_doc.merge!( {field_name => [formatted_value]} )
18
- end
19
- return solr_doc
20
- end
21
-
22
- # Instance Methods
23
- def insert_solr_field_value(solr_doc, field_name, field_value, unique=false)
24
- Cul::Scv::Hydra::Solrizer::Extractor.insert_solr_field_value(solr_doc, field_name, field_value, unique)
25
- end
26
- end
2
+ class Extractor < Cul::Hydra::Solrizer:Extractor;end
27
3
  end
@@ -1,434 +1,9 @@
1
1
  module Cul::Scv::Hydra::Solrizer
2
- module ScvModsFieldable
2
+ module ScvModsFieldable
3
3
  extend ActiveSupport::Concern
4
- include Solrizer::DefaultDescriptors::Normal
5
-
6
- MODS_NS = {'mods'=>'http://www.loc.gov/mods/v3'}
7
-
8
- module ClassMethods
9
- def value_mapper(maps=nil)
10
- @value_mapper ||= ValueMapper.new(maps)
11
- end
12
-
13
- def map_field(field_key, map_key)
14
- value_mapper.map_field(field_key, map_key)
15
- end
16
-
17
- def map_value(field_key, value_key)
18
- value_mapper.map_value(field_key, value_key)
19
- end
20
-
21
- def maps_field?(field_key)
22
- value_mapper.maps_field? field_key
23
- end
24
- end
25
-
26
- def mods
27
- ng_xml.xpath('/mods:mods', MODS_NS).first
28
- end
29
-
30
- def projects
31
- mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']", MODS_NS).collect do |p_node|
32
- ScvModsFieldable.normalize(main_title(p_node), true)
33
- end
34
- end
35
-
36
- def collections
37
- mods.xpath("./mods:relatedItem[@type='host' and @displayLabel='Collection']", MODS_NS).collect do |p_node|
38
- ScvModsFieldable.normalize(main_title(p_node), true)
39
- end
40
- end
41
-
42
- def sort_title(node=mods)
43
- # include only the untyped [!@type] titleInfo, exclude noSort
44
- base_text = ''
45
- t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
46
- if t
47
- t.children.each do |child|
48
- base_text << child.text unless child.name == 'nonSort'
49
- end
50
- end
51
- base_text = ScvModsFieldable.normalize(base_text, true)
52
- base_text = nil if base_text.empty?
53
- base_text
54
- end
55
-
56
- def main_title(node=mods)
57
- # include only the untyped [!@type] titleInfo
58
- t = node.xpath('./mods:titleInfo[not(@type)]', MODS_NS).first
59
- if t
60
- ScvModsFieldable.normalize(t.text)
61
- else
62
- nil
63
- end
64
- end
65
-
66
- def titles(node=mods)
67
- # all titles without descending into relatedItems
68
- # For now, this only includes the main title and selected alternate_titles
69
- all_titles = []
70
- all_titles << main_title unless main_title.nil?
71
- all_titles += alternative_titles unless alternative_titles.nil?
72
- end
73
-
74
- def alternative_titles(node=mods)
75
- node.xpath('./mods:titleInfo[@type and (@type="alternative" or @type="abbreviated" or @type="translated" or @type="uniform")]', MODS_NS).collect do |t|
76
- ScvModsFieldable.normalize(t.text)
77
- end
78
- end
79
-
80
- def names(role_authority=nil, role=nil)
81
- # get all the name nodes
82
- # keep all child text except the role terms
83
- xpath = "./mods:name"
84
- unless role_authority.nil?
85
- xpath << "/mods:role/mods:roleTerm[@authority='#{role_authority.to_s}'"
86
- unless role.nil?
87
- xpath << " and normalize-space(text()) = '#{role.to_s.strip}'"
88
- end
89
- xpath << "]/ancestor::mods:name"
90
- end
91
- names = mods.xpath(xpath, MODS_NS).collect do |node|
92
- base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
93
- ScvModsFieldable.normalize(base_text, true)
94
- end
95
-
96
- # Note: Removing subject names from name field extraction.
97
- # See: https://issues.cul.columbia.edu/browse/DCV-231 and https://issues.cul.columbia.edu/browse/SCV-102
98
- #xpath = "./mods:subject" + xpath[1,xpath.length]
99
- #mods.xpath(xpath, MODS_NS).each do |node|
100
- # base_text = node.xpath('./mods:namePart', MODS_NS).collect { |c| c.text }.join(' ')
101
- # names << ScvModsFieldable.normalize(base_text, true)
102
- #end
103
-
104
- names
105
- end
106
-
107
- def dates(node=mods)
108
- # get all the dateIssued with keyDate = 'yes', but not point = 'end'
109
- end
110
-
111
- def formats(node=mods)
112
- # get all the form values with authority != 'marcform'
113
- node.xpath("./mods:physicalDescription/mods:form[@authority != 'marcform']", MODS_NS).collect do |n|
114
- ScvModsFieldable.normalize(n.text)
115
- end
116
- end
117
-
118
- def repository_code(node=mods)
119
- # get the location/physicalLocation[@authority = 'marcorg']
120
- repo_code_node = node.xpath("./mods:location/mods:physicalLocation[@authority = 'marcorg']", MODS_NS).first
121
-
122
- if repo_code_node
123
- ScvModsFieldable.normalize(repo_code_node.text)
124
- else
125
- return nil
126
- end
127
- end
128
-
129
- def repository_text(node=mods)
130
- # get the location/physicalLocation[not(@authority)]
131
- repo_text_node = node.xpath("./mods:location/mods:physicalLocation[not(@authority)]", MODS_NS).first
132
-
133
- if repo_text_node
134
- ScvModsFieldable.normalize(repo_text_node.text)
135
- else
136
- return nil
137
- end
138
- end
139
-
140
- def translate_repo_marc_code(code, type)
141
- #code = ScvModsFieldable.normalize(code)
142
-
143
- if type == 'short'
144
- return translate_with_default(SHORT_REPO, code, 'Non-Columbia Location')
145
- elsif type == 'long'
146
- return translate_with_default(LONG_REPO, code, 'Non-Columbia Location')
147
- elsif type == 'full'
148
- return translate_with_default(FULL_REPO, code, 'Non-Columbia Location')
149
- end
150
-
151
- return nil
152
- end
153
-
154
- def translate_project_title(project_title, type)
155
- normalized_project_title = ScvModsFieldable.normalize(project_title)
156
-
157
- if type == 'short'
158
- return translate_with_default(SHORT_PROJ, normalized_project_title, normalized_project_title)
159
- elsif type == 'full'
160
- return translate_with_default(FULL_PROJ, normalized_project_title, normalized_project_title)
161
- end
162
-
163
- return nil
164
- end
165
-
166
- def shelf_locators(node=mods)
167
- node.xpath("./mods:location/mods:shelfLocator", MODS_NS).collect do |n|
168
- ScvModsFieldable.normalize(n.text, true)
169
- end
170
- end
171
-
172
- def textual_dates(node=mods)
173
- dates = []
174
- node.xpath("./mods:originInfo/mods:dateCreated[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
175
- dates << ScvModsFieldable.normalize(n.text, true)
176
- end
177
- node.xpath("./mods:originInfo/mods:dateIssued[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
178
- dates << ScvModsFieldable.normalize(n.text, true)
179
- end
180
- node.xpath("./mods:originInfo/mods:dateOther[not(@keyDate) and not(@point) and not(@w3cdtf)]", MODS_NS).collect do |n|
181
- dates << ScvModsFieldable.normalize(n.text, true)
182
- end
183
- return dates
184
- end
185
-
186
- def date_range_to_textual_date(start_year, end_year)
187
- start_year = start_year.to_i.to_s # Remove zero-padding if present
188
- end_year = end_year.to_i.to_s # Remove zero-padding if present
189
-
190
- if start_year == end_year
191
- return [start_year]
192
- else
193
- return [('Between ' +
194
- (start_year.to_i > 0 ? start_year : start_year[1,start_year.length] + ' BCE') +
195
- ' and ' +
196
- (end_year.to_i > 0 ? (start_year.to_i > 0 ? end_year : end_year + ' CE') : end_year[1,end_year.length] + ' BCE')
197
- )]
198
- end
199
- end
200
-
201
- def date_notes(node=mods)
202
- date_notes = []
203
- node.xpath("./mods:note[@type = 'date' or @type = 'date source']", MODS_NS).collect do |n|
204
- date_notes << ScvModsFieldable.normalize(n.text, true)
205
- end
206
- return date_notes
207
- end
208
-
209
- def non_date_notes(node=mods)
210
- non_date_notes = []
211
- node.xpath("./mods:note[not(@type) or (@type != 'date' and @type != 'date source')]", MODS_NS).collect do |n|
212
- non_date_notes << ScvModsFieldable.normalize(n.text, true)
213
- end
214
- return non_date_notes
215
- end
216
-
217
- def item_in_context_url(node=mods)
218
- item_in_context_url_val = []
219
- node.xpath("./mods:location/mods:url[@access='object in context' and @usage='primary display']", MODS_NS).collect do |n|
220
- item_in_context_url_val << ScvModsFieldable.normalize(n.text, true)
221
- end
222
- item_in_context_url_val
223
- end
224
-
225
- def project_url(node=mods)
226
- project_url_val = []
227
- node.xpath("./mods:relatedItem[@type='host' and @displayLabel='Project']/mods:location/mods:url", MODS_NS).collect do |n|
228
- project_url_val << ScvModsFieldable.normalize(n.text, true)
229
- end
230
- project_url_val
231
- end
232
-
233
- def all_subjects(node=mods)
234
- list_of_subjects = []
235
-
236
- node.xpath("./mods:subject/mods:topic", MODS_NS).collect do |n|
237
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
238
- end
239
- node.xpath("./mods:subject/mods:geographic", MODS_NS).collect do |n|
240
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
241
- end
242
- node.xpath("./mods:subject/mods:name", MODS_NS).collect do |n|
243
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
244
- end
245
- node.xpath("./mods:subject/mods:temporal", MODS_NS).collect do |n|
246
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
247
- end
248
- node.xpath("./mods:subject/mods:titleInfo", MODS_NS).collect do |n|
249
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
250
- end
251
- node.xpath("./mods:subject/mods:genre", MODS_NS).collect do |n|
252
- list_of_subjects << ScvModsFieldable.normalize(n.text, true)
253
- end
254
-
255
- return list_of_subjects
256
- end
257
-
258
- def origin_info_place(node=mods)
259
- places = []
260
- node.xpath("./mods:originInfo/mods:place/mods:placeTerm", MODS_NS).collect do |n|
261
- places << ScvModsFieldable.normalize(n.text, true)
262
- end
263
- return places
264
- end
265
-
266
- def origin_info_place_for_display(node=mods)
267
- # If there are multiple origin_info place elements, choose only the ones without valueURI attributes. Otherwise show the others.
268
- places_with_uri = []
269
- places_without_uri = []
270
- node.xpath("./mods:originInfo/mods:place/mods:placeTerm[@valueURI]", MODS_NS).collect do |n|
271
- places_with_uri << ScvModsFieldable.normalize(n.text, true)
272
- end
273
- node.xpath("./mods:originInfo/mods:place/mods:placeTerm[not(@valueURI)]", MODS_NS).collect do |n|
274
- places_without_uri << ScvModsFieldable.normalize(n.text, true)
275
- end
276
-
277
- return (places_without_uri.length > 0 ? places_without_uri : places_with_uri)
278
- end
279
-
280
- def coordinates(node=mods)
281
- coordinate_values = []
282
- node.xpath("./mods:subject/mods:cartographics/mods:coordinates", MODS_NS).collect do |n|
283
- n = ScvModsFieldable.normalize(n.text, true)
284
- if n.match(/-*\d+\.\d+\s*,\s*-*\d+\.\d+\s*/) # Expected coordinate format: 40.123456,-73.5678
285
- coordinate_values << n
286
- end
287
- end
288
- coordinate_values
289
- end
290
-
291
- def to_solr(solr_doc={})
292
- solr_doc = (defined? super) ? super : solr_doc
293
-
294
- return solr_doc if mods.nil? # There is no mods. Return because there is nothing to process.
295
-
296
- solr_doc["all_text_teim"] ||= []
297
-
298
- solr_doc["title_si"] = sort_title
299
- solr_doc["title_ssm"] = titles
300
- solr_doc["alternative_title_ssm"] = alternative_titles
301
- solr_doc["all_text_teim"] += solr_doc["alternative_title_ssm"]
302
- solr_doc["lib_collection_sim"] = collections
303
- solr_doc["lib_name_sim"] = names
304
- solr_doc["lib_name_teim"] = solr_doc["lib_name_sim"]
305
- solr_doc["all_text_teim"] += solr_doc["lib_name_teim"]
306
- solr_doc["lib_all_subjects_ssm"] = all_subjects
307
- solr_doc["lib_all_subjects_teim"] = solr_doc["lib_all_subjects_ssm"]
308
- solr_doc["all_text_teim"] += solr_doc["lib_all_subjects_teim"]
309
- solr_doc["lib_name_ssm"] = solr_doc["lib_name_sim"]
310
- solr_doc["lib_author_sim"] = names(:marcrelator, 'aut')
311
- solr_doc["lib_recipient_sim"] = names(:marcrelator, 'rcp')
312
- solr_doc["lib_format_sim"] = formats
313
- solr_doc["lib_shelf_sim"] = shelf_locators
314
- solr_doc["lib_date_textual_ssm"] = textual_dates
315
- solr_doc["lib_date_notes_ssm"] = date_notes
316
- solr_doc["lib_non_date_notes_ssm"] = non_date_notes
317
- solr_doc["lib_item_in_context_url_ssm"] = item_in_context_url
318
- solr_doc["lib_project_url_ssm"] = project_url
319
- solr_doc["origin_info_place_ssm"] = origin_info_place
320
- solr_doc["origin_info_place_for_display_ssm"] = origin_info_place_for_display
321
-
322
- repo_marc_code = repository_code
323
- unless repo_marc_code.nil?
324
- solr_doc["lib_repo_short_ssim"] = [translate_repo_marc_code(repo_marc_code, 'short')]
325
- solr_doc["lib_repo_long_sim"] = [translate_repo_marc_code(repo_marc_code, 'long')]
326
- solr_doc["lib_repo_full_ssim"] = [translate_repo_marc_code(repo_marc_code, 'full')]
327
- end
328
- solr_doc["lib_repo_text_ssm"] = repository_text
329
-
330
- project_titles = projects
331
- unless project_titles.nil?
332
- solr_doc["lib_project_short_ssim"] = []
333
- solr_doc["lib_project_full_ssim"] = []
334
- project_titles.each {|project_title|
335
- solr_doc["lib_project_short_ssim"] << translate_project_title(project_title, 'short')
336
- solr_doc["lib_project_full_ssim"] << translate_project_title(project_title, 'full')
337
- }
338
- solr_doc["lib_project_short_ssim"].uniq!
339
- solr_doc["lib_project_full_ssim"].uniq!
340
- end
341
-
342
- # Create convenient start and end date values based on one of the many possible originInfo/dateX elements.
343
- possible_start_date_fields = ['origin_info_date_issued_ssm', 'origin_info_date_issued_start_ssm', 'origin_info_date_created_ssm', 'origin_info_date_created_start_ssm', 'origin_info_date_other_ssm', 'origin_info_date_other_start_ssm']
344
- possible_end_date_fields = ['origin_info_date_issued_end_ssm', 'origin_info_date_created_end_ssm', 'origin_info_date_other_end_ssm']
345
- start_date = nil
346
- end_date = nil
347
- start_year = nil
348
- end_year = nil
349
- possible_start_date_fields.each{|key|
350
- if solr_doc.has_key?(key)
351
- start_date = solr_doc[key][0]
352
- break
353
- end
354
- }
355
- possible_end_date_fields.each{|key|
356
- if solr_doc.has_key?(key)
357
- end_date = solr_doc[key][0]
358
- break
359
- end
360
- }
361
-
362
- if start_date.present?
363
-
364
- end_date = start_date if end_date.blank?
365
-
366
- year_regex = /^(-?\d{1,4}).*/
367
-
368
- start_year_match = start_date.match(year_regex)
369
- if start_year_match && start_year_match.captures.length > 0
370
- start_year = start_year_match.captures[0]
371
- start_year = zero_pad_year(start_year)
372
- solr_doc["lib_start_date_year_itsi"] = start_year.to_i # TrieInt version for searches
373
- end
374
-
375
- end_year_match = end_date.match(year_regex)
376
- if end_year_match && end_year_match.captures.length > 0
377
- end_year = end_year_match.captures[0]
378
- end_year = zero_pad_year(end_year)
379
- solr_doc["lib_end_date_year_itsi"] = end_year.to_i # TrieInt version for searches
380
- end
381
-
382
- solr_doc["lib_date_year_range_si"] = start_year + '-' + end_year if start_year
383
-
384
- # When no textual date is available, fall back to other date data (if available)
385
- if solr_doc["lib_date_textual_ssm"].blank?
386
-
387
- solr_doc["lib_date_textual_ssm"] = date_range_to_textual_date(start_year.to_i, end_year.to_i)
388
- end
389
- end
390
-
391
- # Geo data
392
- solr_doc["geo"] = coordinates
393
-
394
- solr_doc.each do |k, v|
395
- if self.class.maps_field? k
396
- solr_doc[k] = self.class.map_value(k, v)
397
- end
398
- end
399
- solr_doc
400
- end
401
-
402
- def zero_pad_year(year)
403
- year = year.to_s
404
- is_negative = year.start_with?('-')
405
- year_without_sign = (is_negative ? year[1, year.length]: year)
406
- if year_without_sign.length < 4
407
- year_without_sign = year_without_sign.rjust(4, '0')
408
- end
409
-
410
- return (is_negative ? '-' : '') + year_without_sign
411
- end
412
-
413
- def self.normalize(t, strip_punctuation=false)
414
- # strip whitespace
415
- n_t = t.dup.strip
416
- # collapse intermediate whitespace
417
- n_t.gsub!(/\s+/, ' ')
418
- # pull off paired punctuation, and any leading punctuation
419
- if strip_punctuation
420
- n_t = n_t.sub(/^\((.*)\)$/, "\\1")
421
- n_t = n_t.sub(/^\{(.*)\}$/, "\\1")
422
- n_t = n_t.sub(/^\[(.*)\]$/, "\\1")
423
- n_t = n_t.sub(/^"(.*)"$/, "\\1")
424
- n_t = n_t.sub(/^'(.*)'$/, "\\1")
425
- n_t = n_t.sub(/^<(.*)>$/, "\\1")
426
- #n_t = n_t.sub(/^\p{Ps}(.*)\p{Pe}/u, "\\1")
427
- n_t = n_t.sub(/^[[:punct:]]+/, '')
428
- # this may have 'created' leading/trailing space, so strip
429
- n_t.strip!
430
- end
431
- n_t
4
+ extend Cul::Hydra::Solrizer::ModsFieldable::ClassMethods
5
+ included do
6
+ include Cul::Hydra::Solrizer::ModsFieldable
432
7
  end
433
8
  end
434
9
  end