bplmodels 0.0.91 → 0.0.93

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,56 @@
1
+ module Bplmodels
2
+ class PageMetadata < ActiveFedora::OmDatastream
3
+ include OM::XML::Document
4
+
5
+ OAI_NS = 'http://www.bpl.org/repository/xml/ns/page'
6
+ OAI_SCHEMA = 'http://www.bpl.org/repository/xml/xsd/page.xsd'
7
+ OAI_PARAMS = {
8
+ "version" => "0.0.1",
9
+ "xmlns:xlink" => "http://www.w3.org/1999/xlink",
10
+ "xmlns:xsi" => "http://www.w3.org/2001/XMLSchema-instance",
11
+ "xmlns" => OAI_NS,
12
+ "xsi:schemaLocation" => "#{OAI_NS} #{OAI_SCHEMA}",
13
+ }
14
+
15
+ set_terminology do |t|
16
+ t.root :path => 'pageData', :xmlns => OAI_NS
17
+
18
+ t.page(:path=>"page") {
19
+ t.page_type(:path=>'pageType')
20
+ t.hand_side(:path=>'handSide')
21
+ t.page_number(:path=>'pageNumber') {
22
+ t.sequence(:path=>{:attribute=>"sequence"})
23
+ }
24
+ t.has_djvu(:path=>'hasDJVU')
25
+ t.has_ocrMaster(:path=>'hasOCRMaster')
26
+
27
+
28
+ }
29
+
30
+ end
31
+
32
+ def self.xml_template
33
+ =begin
34
+ builder = Nokogiri::XML::Builder.new(:encoding => "UTF-8") do |xml|
35
+ xml.book(OAI_PARAMS) {
36
+ xml.parent.namespace = xml.parent.namespace_definitions.find{|ns|ns.prefix=="book"}
37
+
38
+ }
39
+ end
40
+ return builder.doc
41
+ =end
42
+ Nokogiri::XML::Builder.new do |xml|
43
+ xml.pageData(OAI_PARAMS) {
44
+
45
+ }
46
+ end.doc
47
+
48
+ end
49
+
50
+ #Required for Active Fedora 9
51
+ def prefix(path=nil)
52
+ return ''
53
+ end
54
+
55
+ end
56
+ end
@@ -0,0 +1,14 @@
1
+ module Bplmodels
2
+ class TranscriptionFile < Bplmodels::File
3
+
4
+ # Use a callback method to declare which derivatives you want
5
+ makes_derivatives :generate_derivatives
6
+
7
+ def fedora_name
8
+ 'transciption_file'
9
+ end
10
+
11
+
12
+
13
+ end
14
+ end
@@ -0,0 +1,37 @@
1
+ module Bplmodels
2
+ class Volume < Bplmodels::ComplexObjectBase
3
+ #has_file_datastream :name => 'productionMaster', :type => ActiveFedora::Datastream
4
+
5
+ has_and_belongs_to_many :collection, :class_name => 'Bplmodels::Book', :property => :is_volume_of
6
+
7
+
8
+ #A collection can have another collection as a member, or an image
9
+ def insert_member(fedora_object)
10
+ if (fedora_object.instance_of?(Bplmodels::ImageFile))
11
+
12
+ #add to the members ds
13
+ members.insert_member(:member_id=>fedora_object.pid, :member_title=>fedora_object.titleSet_display, :member_type=>fedora_object.fedora_name)
14
+
15
+ #add to the rels-ext ds
16
+ fedora_object.object << self
17
+ self.image_files << fedora_object
18
+
19
+ end
20
+
21
+ fedora_object.save!
22
+ self.save!
23
+
24
+ end
25
+
26
+ def fedora_name
27
+ 'volume'
28
+ end
29
+
30
+ def to_solr(doc = {} )
31
+ doc = super(doc)
32
+ doc['active_fedora_model_ssi'] = self.class.name
33
+ doc
34
+ end
35
+
36
+ end
37
+ end
@@ -49,6 +49,11 @@ module Bplmodels
49
49
  t.reason(:path=>'reason')
50
50
  }
51
51
 
52
+ t.volume_match_md5s(:path=>'volumeMatchMD5s') {
53
+ t.marc(:path=>'marc')
54
+ t.iaMeta(:path=>'iaMeta')
55
+ }
56
+
52
57
  end
53
58
 
54
59
  def self.xml_template
@@ -65,6 +70,7 @@ module Bplmodels
65
70
  end
66
71
 
67
72
 
73
+
68
74
  def insert_file_path(value=nil)
69
75
  ingest_filepath_index = self.item_source.ingest_filepath.count
70
76
 
@@ -0,0 +1,88 @@
1
+ module ActiveFedora::RDF
2
+ # Responsible for generating the solr document (via #generate_solr_document) of the
3
+ # given object.
4
+ #
5
+ # @see ActiveFedora::Indexing
6
+ # @see ActiveFedora::IndexingService
7
+ class IndexingService
8
+ include Solrizer::Common
9
+ attr_reader :object
10
+
11
+ # @param obj [#resource, #rdf_subject] the object to build an solr document for. Its class must respond to 'properties'
12
+ def initialize(obj)
13
+ @object = obj
14
+ end
15
+
16
+ # Creates a solr document hash for the rdf assertions of the {#object}
17
+ # @yield [Hash] yields the solr document
18
+ # @return [Hash] the solr document
19
+ def generate_solr_document(prefix_method = nil)
20
+ solr_doc = add_assertions(prefix_method)
21
+ yield(solr_doc) if block_given?
22
+ solr_doc
23
+ end
24
+
25
+ protected
26
+
27
+ def add_assertions(prefix_method, solr_doc = {})
28
+ fields.each do |field_key, field_info|
29
+ solr_field_key = solr_document_field_name(field_key, prefix_method)
30
+ field_info.values.each do |val|
31
+ append_to_solr_doc(solr_doc, solr_field_key, field_info, val)
32
+ end
33
+ end
34
+ solr_doc
35
+ end
36
+
37
+ # Override this in order to allow one field to be expanded into more than one:
38
+ # example:
39
+ # def append_to_solr_doc(solr_doc, field_key, field_info, val)
40
+ # Solrizer.set_field(solr_doc, 'lcsh_subject_uri', val.to_uri, :symbol)
41
+ # Solrizer.set_field(solr_doc, 'lcsh_subject_label', val.to_label, :searchable)
42
+ # end
43
+ def append_to_solr_doc(solr_doc, solr_field_key, field_info, val)
44
+ self.class.create_and_insert_terms(solr_field_key,
45
+ solr_document_field_value(val),
46
+ field_info.behaviors, solr_doc)
47
+ end
48
+
49
+ def solr_document_field_name(field_key, prefix_method)
50
+ if prefix_method
51
+ prefix_method.call(field_key)
52
+ else
53
+ field_key.to_s
54
+ end
55
+ end
56
+
57
+ def solr_document_field_value(val)
58
+ case val
59
+ when ::RDF::URI
60
+ val.to_s
61
+ when ActiveTriples::Resource
62
+ val.node? ? val.rdf_label : val.rdf_subject.to_s
63
+ else
64
+ val
65
+ end
66
+ end
67
+
68
+ def resource
69
+ object.resource
70
+ end
71
+
72
+ def index_config
73
+ object.class.index_config
74
+ end
75
+
76
+ # returns the field map instance
77
+ def fields
78
+ field_map_class.new do |field_map|
79
+ index_config.each { |name, index_field_config| field_map.insert(name, index_field_config, object) }
80
+ end
81
+ end
82
+
83
+ # Override this method to use your own FieldMap class for custom indexing of objects and properties
84
+ def field_map_class
85
+ ActiveFedora::RDF::FieldMap
86
+ end
87
+ end
88
+ end
@@ -0,0 +1,189 @@
1
+
2
+ module ActiveFedora::Rdf
3
+ class Term
4
+ attr_accessor :parent, :value_arguments, :node_cache
5
+ delegate *(Array.public_instance_methods - [:__send__, :__id__, :class, :object_id] + [:as_json]), :to => :result
6
+ def initialize(parent, value_arguments)
7
+ self.parent = parent
8
+ self.value_arguments = value_arguments
9
+ end
10
+
11
+ def clear
12
+ set(nil)
13
+ end
14
+
15
+ def result
16
+ result = parent.query(:subject => rdf_subject, :predicate => predicate)
17
+ .map{|x| convert_object(x.object)}
18
+ .reject(&:nil?)
19
+ return result if !property_config || property_config[:multivalue]
20
+ result.first
21
+ end
22
+
23
+ def set(values)
24
+ values = [values].compact unless values.kind_of?(Array)
25
+ empty_property
26
+ values.each do |val|
27
+ set_value(val)
28
+ end
29
+ parent.persist! if parent.class.repository == :parent && parent.send(:repository)
30
+ end
31
+
32
+ def empty_property
33
+ parent.query([rdf_subject, predicate, nil]).each_statement do |statement|
34
+ if !uri_class(statement.object) || uri_class(statement.object) == class_for_property
35
+ parent.delete(statement)
36
+ end
37
+ end
38
+ end
39
+
40
+ def build(attributes={})
41
+ new_subject = attributes.key?('id') ? attributes.delete('id') : RDF::Node.new
42
+ node = make_node(new_subject)
43
+ node.attributes = attributes
44
+ if parent.kind_of? List::ListResource
45
+ parent.list << node
46
+ return node
47
+ elsif node.kind_of? RDF::List
48
+ self.push node.rdf_subject
49
+ return node
50
+ end
51
+ self.push node
52
+ node
53
+ end
54
+
55
+ def first_or_create(attributes={})
56
+ result.first || build(attributes)
57
+ end
58
+
59
+ def delete(*values)
60
+ values.each do |value|
61
+ parent.delete([rdf_subject, predicate, value])
62
+ end
63
+ end
64
+
65
+ def << (values)
66
+ values = Array.wrap(result) | Array.wrap(values)
67
+ self.set(values)
68
+ end
69
+
70
+ alias_method :push, :<<
71
+
72
+ def property_config
73
+ return type_property if (property == RDF.type || property.to_s == "type") && !parent.send(:properties)[property]
74
+ parent.send(:properties)[property]
75
+ end
76
+
77
+ def type_property
78
+ {:multivalue => true, :predicate => RDF.type}
79
+ end
80
+
81
+ def reset!
82
+ end
83
+
84
+ def property
85
+ value_arguments.last
86
+ end
87
+
88
+ def rdf_subject
89
+ raise ArgumentError("wrong number of arguments (#{value_arguments.length} for 1-2)") if value_arguments.length < 1 || value_arguments.length > 2
90
+ if value_arguments.length > 1
91
+ value_arguments.first
92
+ else
93
+ parent.rdf_subject
94
+ end
95
+ end
96
+
97
+ protected
98
+
99
+ def node_cache
100
+ @node_cache ||= {}
101
+ end
102
+
103
+ def set_value(val)
104
+ object = val
105
+ val = val.resource if val.respond_to?(:resource)
106
+ val = value_to_node(val)
107
+ if val.kind_of? Resource
108
+ node_cache[val.rdf_subject] = nil
109
+ add_child_node(val, object)
110
+ return
111
+ end
112
+ val = val.to_uri if val.respond_to? :to_uri
113
+ raise 'value must be an RDF URI, Node, Literal, or a valid datatype. See RDF::Literal' unless
114
+ val.kind_of? RDF::Value or val.kind_of? RDF::Literal
115
+ parent.insert [rdf_subject, predicate, val]
116
+ end
117
+
118
+ def value_to_node(val)
119
+ valid_datatype?(val) ? RDF::Literal(val) : val
120
+ end
121
+
122
+ def add_child_node(resource,object=nil)
123
+ parent.insert [rdf_subject, predicate, resource.rdf_subject]
124
+ resource.parent = parent unless resource.frozen?
125
+ self.node_cache[resource.rdf_subject] = (object ? object : resource)
126
+ resource.persist! if resource.class.repository == :parent
127
+ end
128
+
129
+ def predicate
130
+ return property_config[:predicate] unless property.kind_of? RDF::URI
131
+ property
132
+ end
133
+
134
+ def valid_datatype?(val)
135
+ val.is_a? String or val.is_a? Date or val.is_a? Time or val.is_a? Numeric or val.is_a? Symbol or val == !!val
136
+ end
137
+
138
+ # Converts an object to the appropriate class.
139
+ def convert_object(value)
140
+ value = value.object if value.kind_of? RDF::Literal
141
+ value = make_node(value) if value.kind_of? RDF::Resource
142
+ value
143
+ end
144
+
145
+ ##
146
+ # Build a child resource or return it from this object's cache
147
+ #
148
+ # Builds the resource from the class_name specified for the
149
+ # property.
150
+ def make_node(value)
151
+ klass = class_for_value(value)
152
+ value = RDF::Node.new if value.nil?
153
+ node = node_cache[value] if node_cache[value]
154
+ node ||= klass.from_uri(value,parent)
155
+ return nil if property_config[:class_name] && class_for_value(value) != class_for_property
156
+ self.node_cache[value] ||= node
157
+ node
158
+ end
159
+
160
+ def final_parent
161
+ @final_parent ||= begin
162
+ parent = self.parent
163
+ while parent != parent.parent && parent.parent
164
+ parent = parent.parent
165
+ end
166
+ return parent.datastream if parent.respond_to?(:datastream) && parent.datastream
167
+ parent
168
+ end
169
+ end
170
+
171
+ def class_for_value(v)
172
+ uri_class(v) || class_for_property
173
+ end
174
+
175
+ def uri_class(v)
176
+ v = RDF::URI.new(v) if v.kind_of? String
177
+ type_uri = parent.query([v, RDF.type, nil]).to_a.first.try(:object)
178
+ ActiveFedora::Rdf::Resource.type_registry[type_uri]
179
+ end
180
+
181
+ def class_for_property
182
+ klass = property_config[:class_name]
183
+ klass ||= ActiveFedora::Rdf::Resource
184
+ klass = ActiveFedora.class_from_string(klass, final_parent.class) if klass.kind_of? String
185
+ klass
186
+ end
187
+
188
+ end
189
+ end
@@ -20,6 +20,7 @@ module Bplmodels
20
20
  GENRE_LOOKUP['Books'] = {:id=>'tgm001221', :authority=>'gmgpc'}
21
21
  GENRE_LOOKUP['Albums'] = {:id=>'tgm000229', :authority=>'gmgpc'}
22
22
  GENRE_LOOKUP['Musical notation'] = {:id=>'tgm006926', :authority=>'lctgm'}
23
+ GENRE_LOOKUP['Music'] = {:id=>'tgm006906', :authority=>'lctgm'}
23
24
 
24
25
  COUNTRY_TGN_LOOKUP = {}
25
26
  COUNTRY_TGN_LOOKUP['United States'] = {:tgn_id=>7012149, :tgn_country_name=>'United States'}
@@ -1,5 +1,6 @@
1
1
  #!/bin/env ruby
2
2
  # encoding: utf-8
3
+ require 'htmlentities'
3
4
 
4
5
  module Bplmodels
5
6
  class DatastreamInputFuncs
@@ -795,7 +796,9 @@ module Bplmodels
795
796
  end
796
797
 
797
798
  def self.utf8Encode(value)
798
- return HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
799
+ value = value.force_encoding('UTF-8')
800
+ value.encode!("UTF-8", 'binary', invalid: :replace, undef: :replace, replace: '') unless value.valid_encoding?
801
+ return ::HTMLEntities.new.decode(ActionView::Base.full_sanitizer.sanitize(value.to_s.gsub(/\r?\n?\t/, ' ').gsub(/\r?\n/, ' ').gsub(/<br[\s]*\/>/,' '))).strip
799
802
  end
800
803
 
801
804
  def self.split_with_nils(value)
@@ -1,11 +1,47 @@
1
1
  module Bplmodels
2
2
  class Finder
3
3
 
4
+ def self.getCollectionObjects(pid)
5
+ return_hash = {}
6
+ return_hash[:images] = []
7
+ return_hash[:documents] = []
8
+ return_hash[:audio] = []
9
+ return_hash[:ereader] = []
10
+ return_hash[:generic] = []
11
+
12
+ preceding_pid_lookup = []
13
+
14
+ Bplmodels::File.find_in_batches('is_file_of_ssim'=>"info:fedora/#{pid}") do |group|
15
+ group.each { |solr_object|
16
+ if solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_AudioFile')
17
+ return_hash[:audio] << solr_object
18
+ elsif solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_ImageFile')
19
+ return_hash[:images] << solr_object
20
+ elsif solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_DocumentFile')
21
+ return_hash[:documents] << solr_object
22
+ elsif solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_EreaderFile')
23
+ return_hash[:ereader] << solr_object
24
+ else
25
+ return_hash[:generic] << solr_object
26
+ end
27
+ }
28
+ end
29
+
30
+ return_hash[:images] = sort_files(return_hash[:images])
31
+ return_hash[:documents] = sort_files(return_hash[:documents])
32
+ return_hash[:audio] = sort_files(return_hash[:audio])
33
+ return_hash[:ereader] = sort_files(return_hash[:ereader])
34
+ return_hash[:generic] = sort_files(return_hash[:generic])
35
+
36
+ return return_hash
37
+ end
38
+
4
39
  def self.getFiles(pid)
5
40
  return_hash = {}
6
41
  return_hash[:images] = []
7
42
  return_hash[:documents] = []
8
43
  return_hash[:audio] = []
44
+ return_hash[:ereader] = []
9
45
  return_hash[:generic] = []
10
46
 
11
47
  preceding_pid_lookup = []
@@ -18,6 +54,8 @@ module Bplmodels
18
54
  return_hash[:images] << solr_object
19
55
  elsif solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_DocumentFile')
20
56
  return_hash[:documents] << solr_object
57
+ elsif solr_object['has_model_ssim'].include?('info:fedora/afmodel:Bplmodels_EreaderFile')
58
+ return_hash[:ereader] << solr_object
21
59
  else
22
60
  return_hash[:generic] << solr_object
23
61
  end
@@ -27,6 +65,7 @@ module Bplmodels
27
65
  return_hash[:images] = sort_files(return_hash[:images])
28
66
  return_hash[:documents] = sort_files(return_hash[:documents])
29
67
  return_hash[:audio] = sort_files(return_hash[:audio])
68
+ return_hash[:ereader] = sort_files(return_hash[:ereader])
30
69
  return_hash[:generic] = sort_files(return_hash[:generic])
31
70
 
32
71
  return return_hash
@@ -97,6 +136,26 @@ module Bplmodels
97
136
  return sort_files(return_list)
98
137
  end
99
138
 
139
+ def self.getEreaderFiles(pid)
140
+ return_list = []
141
+ Bplmodels::EreaderFile.find_in_batches('is_ereader_of_ssim'=>"info:fedora/#{pid}") do |group|
142
+ group.each { |solr_object|
143
+ return_list << solr_object
144
+ }
145
+ end
146
+ return sort_files(return_list)
147
+ end
148
+
149
+ def self.getVolumeObjects(pid)
150
+ return_list = []
151
+ Bplmodels::Volume.find_in_batches('is_volume_of_ssim'=>"info:fedora/#{pid}") do |group|
152
+ group.each { |solr_object|
153
+ return_list << solr_object
154
+ }
155
+ end
156
+ return sort_files(return_list)
157
+ end
158
+
100
159
  def self.getFirstImageFile(pid)
101
160
  Bplmodels::ImageFile.find_in_batches('is_image_of_ssim'=>"info:fedora/#{pid}", 'is_following_image_of_ssim'=>'') do |group|
102
161
  group.each { |solr_object|
@@ -124,6 +183,24 @@ module Bplmodels
124
183
  return nil
125
184
  end
126
185
 
186
+ def self.getFirstEreaderFile(pid)
187
+ Bplmodels::EreaderFile.find_in_batches('is_ereader_of_ssim'=>"info:fedora/#{pid}", 'is_following_ereader_of_ssim'=>'') do |group|
188
+ group.each { |solr_object|
189
+ return solr_object
190
+ }
191
+ end
192
+ return nil
193
+ end
194
+
195
+ def self.getFirstVolumeObject(pid)
196
+ Bplmodels::Volume.find_in_batches('is_volume_of_ssim'=>"info:fedora/#{pid}", 'is_following_volume_of_ssim'=>'') do |group|
197
+ group.each { |solr_object|
198
+ return solr_object
199
+ }
200
+ end
201
+ return nil
202
+ end
203
+
127
204
  def self.getNextImageFile(pid)
128
205
  Bplmodels::ImageFile.find_in_batches('is_following_image_of_ssim'=>"info:fedora/#{pid}") do |group|
129
206
  group.each { |solr_object|
@@ -151,6 +228,15 @@ module Bplmodels
151
228
  return nil
152
229
  end
153
230
 
231
+ def self.getNextEReaderFile(pid)
232
+ Bplmodels::EreaderFile.find_in_batches('is_following_ereader_of_ssim'=>"info:fedora/#{pid}") do |group|
233
+ group.each { |solr_object|
234
+ return solr_object
235
+ }
236
+ end
237
+ return nil
238
+ end
239
+
154
240
  def self.getPrevImageFile(pid)
155
241
  Bplmodels::ImageFile.find_in_batches('is_preceding_image_of_ssim'=>"info:fedora/#{pid}") do |group|
156
242
  group.each { |solr_object|
@@ -178,6 +264,15 @@ module Bplmodels
178
264
  return nil
179
265
  end
180
266
 
267
+ def self.getPrevEreaderFile(pid)
268
+ Bplmodels::EReaderFile.find_in_batches('is_preceding_ereader_of_ssim'=>"info:fedora/#{pid}") do |group|
269
+ group.each { |solr_object|
270
+ return solr_object
271
+ }
272
+ end
273
+ return nil
274
+ end
275
+
181
276
  def self.getFileParentObject(file_pid)
182
277
  Bplmodels::File.find_in_batches('id'=>"#{file_pid}") do |group|
183
278
  group.each { |solr_object|
@@ -188,5 +283,14 @@ module Bplmodels
188
283
  return nil
189
284
  end
190
285
 
286
+
287
+ def self.getUnprocessedDerivatives(pid)
288
+ Bplmodels::File.find_in_batches('is_file_of_ssim'=>"info:fedora/#{pid}", 'derivative_processsed_ssi'=>'false') do |group|
289
+ group.each { |solr_object|
290
+ return solr_object
291
+ }
292
+ end
293
+ return nil
294
+ end
191
295
  end
192
296
  end