RubyGems - lbp - Versions diffs - 0.0.2 → 0.1.0 - Mend

lbp 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (34) hide show

checksums.yaml +4 -4
data/.gitignore +4 -0
data/.ruby-gemset +1 -1
data/.ruby-version +1 -1
data/bin/lbp +33 -2
data/lbp.gemspec +3 -4
data/lib/lbp.rb +15 -5
data/lib/lbp/expression.rb +163 -0
data/lib/lbp/file.rb +173 -0
data/lib/lbp/file_part.rb +120 -0
data/lib/lbp/functions.rb +11 -2
data/lib/lbp/manifestation.rb +36 -0
data/lib/lbp/paragraph_image.rb +39 -0
data/lib/lbp/query.rb +181 -0
data/lib/lbp/resource.rb +72 -0
data/lib/lbp/transcription.rb +27 -243
data/lib/lbp/version.rb +1 -1
data/spec/config_globals.rb +28 -13
data/spec/expression_spec.rb +96 -0
data/spec/file_part_spec.rb +55 -0
data/spec/file_spec.rb +130 -0
data/spec/paragraph_image_spec.rb +46 -0
data/spec/query_spec.rb +27 -0
data/spec/resource_spec.rb +78 -0
data/spec/transcription_spec.rb +21 -111
metadata +40 -19
data/lib/lbp/collection.rb +0 -131
data/lib/lbp/item.rb +0 -153
data/lib/lbp/item_group.rb +0 -52
data/lib/lbp/paragraph.rb +0 -87
data/spec/collection_spec.rb +0 -60
data/spec/item_group_spec.rb +0 -39
data/spec/item_spec.rb +0 -74
data/spec/paragraph_spec.rb +0 -37

data/lib/lbp/file_part.rb ADDED

@@ -0,0 +1,120 @@
+require 'nokogiri'
+#require 'rugged'
+#require 'lbp/functions'
+require 'lbp'
+module Lbp
+	class FilePart
+		attr_reader :partid, :element
+		def initialize(filepath, transcription_type, confighash, partid)
+			@confighash = confighash
+			@partid = partid
+			@filepath = filepath
+			@transcription_type = transcription_type
+			@element = self.element_name
+	  end
+	  def element_name
+	  	transcr = File.new(@filepath, @transcription_type, @confighash)
+	  	xmlobject = transcr.nokogiri
+	  	element_name = xmlobject.xpath("name(//node()[@xml:id='#{@partid}'])", 'tei' => 'http://www.tei-c.org/ns/1.0')
+	  end
+	  # def number
+	  # 	transcr = File.new(@filepath, @transcription_type, @confighash)
+	  # 	totalparts = transcr.number_of_body_paragraphs
+	  # 	xmlobject = transcr.nokogiri
+	  # 	parts_following = xmlobject.xpath("//tei:body//tei:#{@element}[preceding::tei:#{@element}[@xml:id='#{@partid}']]", 'tei' => 'http://www.tei-c.org/ns/1.0').count
+	  # 	part_number = totalparts - parts_following
+			# return part_number
+	  # end
+	  def next
+	  	xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
+	  	nextpartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/following::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
+			if nextpartid.text == nil
+        return nil
+      else
+				return FilePart.new(@filepath, @transcription_type, @confighash, nextpartid.text)
+      end
+	  end
+	  def previous
+	  	xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
+	  	previouspartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/preceding::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
+	  	if previouspartid.empty?
+        return nil
+      else
+				return FilePart.new(@filepath, @transcription_type, @confighash, previouspartid.text)
+      end
+	  end
+		def number_of_zones
+			xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
+			partid_with_hash = "#" + @partid
+			result = xmlobject.xpath("/tei:TEI/tei:facsimile//tei:surface/tei:zone[@start='#{partid_with_hash}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
+			return result.count
+		end
+	  def xml
+	  	result = File.new(@filepath, @transcription_type, @confighash).nokogiri
+	  	p = result.xpath("//tei:#{@element}[@xml:id='#{@partid}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
+	  end
+	  def transform(xsltfile, xslt_param_array=[])
+	  	result = File.new(@filepath, @transcription_type, @confighash).transform(xsltfile, xslt_param_array)
+			p = result.xpath("//#{@element}[@id='#{@partid}']")
+			return p
+		end
+		# TODO
+		# not working because result of transformation is no longer valid xml document
+		# might be easier to pass the part if to the xslt_param_array and then return the result
+		#def transform_main_view(xslt_param_array=[])
+		#	result = File.new(@filepath, @transcription_type, @confighash).transform_main_view(xslt_param_array)
+		#	#p = result.xpath("//#{@element}[@id='#{@partid}']")
+		#	#hard coding for the moment so it will only really work for paragraphs
+		#	p = result.xpath("//div[@id='pwrap_#{@partid}']")
+		#	return p
+		#end
+		def transform_plain_text(xslt_param_array=[])
+			# not that it could be slightly confusing that paragraph plain text uses the transform clean,
+			# because we still the basic paragraph elements in order to select the desired paragraph
+			result = File.new(@filepath, @transcription_type, @confighash).transform_clean_nokogiri(xslt_param_array)
+			p = result.xpath("//#{@element}[@id='#{@partid}']")
+			return p
+		end
+		def word_count
+    	plaintext = self.transform_plain_text
+    	size = plaintext.text.split.size
+    end
+    def word_array
+    	plaintext = self.transform_plain_text
+    	word_array = plaintext.text.split
+    	word_array.map!{ |word| word.downcase}
+    end
+    def word_frequency(sort='frequency', order='descending')
+    	word_array = self.word_array
+    	wf = Hash.new(0)
+			word_array.each { |word| wf[word] += 1 }
+			if sort == "frequency"
+				if order == "descending" # high to low
+					wf = wf.sort_by{|k,v| v}.reverse
+				elsif order == "ascending" # low to high
+					wf = wf.sort_by{|k,v| v}
+				end
+			elsif sort == "word"
+				if order == "descending" # z - a
+						wf = wf.sort_by{|k,v| k}.reverse
+				elsif order == "ascending" #a - z
+						wf = wf.sort_by{|k,v| k}
+				end
+			end
+			return wf.to_h
+		end
+	end
+end

data/lib/lbp/functions.rb CHANGED

@@ -2,11 +2,20 @@
 require 'nokogiri'
 require 'open-uri'
-def xslt_transform(xmlfile, xsltfile, xslt_param_array)
-	xml  = Nokogiri::XML(open(xmlfile))
+def xslt_transform(xml_open_uri_file, xsltfile, xslt_param_array)
+	xml  = xml_open_uri_file
 	xslt = Nokogiri::XSLT(open(xsltfile))
 	result_doc = xslt.transform(xml, xslt_param_array)
 	return result_doc
 end
+def xslt_apply_to(xml_open_uri_file, xsltfile, xslt_param_array)
+	xml  = xml_open_uri_file
+	xslt = Nokogiri::XSLT(open(xsltfile))
+	result_doc = xslt.apply_to(xml, xslt_param_array)
+	return result_doc
+end

data/lib/lbp/manifestation.rb ADDED

@@ -0,0 +1,36 @@
+require 'openssl'
+require 'rdf'
+require 'rdf/rdfxml'
+require 'rdf/ntriples'
+require 'rdf/vocab'
+require 'lbp'
+module Lbp
+	class Manifestation < Resource
+		#inherits initialization from Resource
+		def transcriptionUrls
+			results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasTranscription"))
+			transcriptions = results.map {|m| m[:o].to_s}
+			return transcriptions
+		end
+		def canonicalTranscriptionUrl
+			# TODO this check against an empty array should
+			# occur everywhere the filter is used
+			# maybe we need a helper function that does this once
+			unless self.results.count == 0
+				transcriptionUrl = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalTranscription")).first[:o].to_s
+				return transcriptionUrl
+			end
+		end
+		def canonicalTranscription
+			url = self.canonicalTranscriptionUrl
+			transcriptionObj = Transcription.new(url)
+			return transcriptionObj
+		end
+	end
+end

data/lib/lbp/paragraph_image.rb ADDED

@@ -0,0 +1,39 @@
+require 'nokogiri'
+require 'lbp/functions'
+module Lbp
+	class ParagraphImage
+		attr_reader :pid
+		#def initialize(confighash, filehash, pid, position=1)
+		def initialize(paragraphurl, position=1)
+			@query = Query.new();
+			@results = @query.zone_info("<" + paragraphurl + ">")
+			@zone_index = position - 1
+		end
+		def ulx
+			return @results[@zone_index][:ulx].to_s.to_i
+		end
+		def uly
+			return @results[@zone_index][:uly].to_s.to_i
+		end
+		def lrx
+			return @results[@zone_index][:lrx].to_s.to_i
+		end
+		def lry
+			return @results[@zone_index][:lry].to_s.to_i
+		end
+		def width
+			return @results[@zone_index][:width].to_s.to_i
+		end
+		def height
+			return @results[@zone_index][:height].to_s.to_i
+		end
+		def url
+			return @results[@zone_index][:canvasurl].to_s.split("/").last + ".jpg"
+		end
+		def canvas
+			return @results[@zone_index][:canvasurl].to_s.split("/").last
+		end
+	end
+end

data/lib/lbp/query.rb ADDED

@@ -0,0 +1,181 @@
+require 'sparql'
+module Lbp
+	class Query
+		def initialize
+			@prefixes = "
+	      PREFIX owl: <http://www.w3.org/2002/07/owl#>
+	      PREFIX dbpedia: <http://dbpedia.org/ontology/>
+	      PREFIX dcterms: <http://purl.org/dc/terms/>
+	      PREFIX dc: <http://purl.org/dc/elements/1.1/>
+	      PREFIX sctap: <http://scta.info/property/>
+	      PREFIX sctar: <http://scta.info/resource/>
+	      PREFIX sctat: <http://scta.info/text/>
+	      PREFIX role: <http://www.loc.gov/loc.terms/relators/>
+	      PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
+	      PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
+	      PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
+	      "
+    end
+		def query(query)
+			if ENV['SPARQL'] == "local"
+		  	sparqlendpoint = "http://localhost:3030/ds/query"
+		  elsif ENV['SPARQL'] == "staging"
+		  	sparqlendpoint = "http://sparql-staging.scta.info/ds/query"
+		  elsif ENV['RACK_ENV'] == "production" || ENV['SPARQL'] == "production"
+		    sparqlendpoint = "http://sparql.scta.info/ds/query"
+		  else
+		    sparqlendpoint = "http://sparql.scta.info/ds/query"
+		  end
+			sparql = SPARQL::Client.new(sparqlendpoint)
+		  result = sparql.query(query)
+		  return result
+		end
+		def subject(url)
+			query = "#{@prefixes}
+          SELECT ?p ?o ?ptype
+          {
+          #{url} ?p ?o .
+          OPTIONAL {
+              ?p rdfs:subPropertyOf ?ptype .
+              }
+          }
+          ORDER BY ?p
+          "
+      result = self.query(query)
+		end
+		def subject_with_short_id(shortid)
+			query = "#{@prefixes}
+          SELECT ?p ?o ?ptype
+          {
+          ?resource <http://scta.info/property/shortId> '#{shortid}' .
+          ?resource ?p ?o .
+          OPTIONAL {
+              ?p rdfs:subPropertyOf ?ptype .
+              }
+          }
+          ORDER BY ?p
+          "
+      result = self.query(query)
+		end
+		def zone_info(paragraphurl)
+			query = "#{@prefixes}
+				SELECT DISTINCT ?zone ?ulx ?uly ?lry ?lrx ?position ?height ?width ?canvasurl
+	      {
+	        #{paragraphurl} <http://scta.info/property/hasZone> ?zone .
+	         ?zone <http://scta.info/property/ulx> ?ulx .
+	         ?zone <http://scta.info/property/uly> ?uly .
+	         ?zone <http://scta.info/property/lry> ?lry .
+	         ?zone <http://scta.info/property/lrx> ?lrx .
+	         ?zone <http://scta.info/property/position> ?position .
+	         ?zone <http://scta.info/property/height> ?height .
+	         ?zone <http://scta.info/property/width> ?width .
+	         ?zone <http://scta.info/property/isZoneOn> ?canvasurl .
+	      }
+	      ORDER BY ?position"
+			result = self.query(query)
+		end
+		def collection_query(collection_url)
+			query = "#{@prefixes}
+				SELECT ?collectiontitle ?title ?item ?questiontitle ?order ?status ?gitRepository
+	      {
+	        #{collection_url} <http://scta.info/property/hasStructureItem> ?item .
+	        #{collection_url} <http://purl.org/dc/elements/1.1/title> ?collectiontitle .
+	        ?item <http://purl.org/dc/elements/1.1/title> ?title  .
+	        ?item <http://scta.info/property/totalOrderNumber> ?order .
+	        ?item <http://scta.info/property/status> ?status .
+	        ?item <http://scta.info/property/gitRepository> ?gitRepository .
+	        OPTIONAL
+	      	{
+	      	?item <http://scta.info/property/questionTitle> ?questiontitle  .
+	      	}
+	      }
+	      ORDER BY ?order"
+		  result = self.query(query)
+		end
+		def item_query(expression_url)
+			query = "#{@prefixes}
+	      SELECT ?item_title ?transcript ?transcript_title ?transcript_status ?transcript_type ?manifestation
+	      {
+	      	#{expression_url} <http://purl.org/dc/elements/1.1/title> ?item_title .
+	      	?manifestation <http://scta.info/property/isManifestationOf> #{expression_url} .
+	      	?transcript <http://scta.info/property/isTranscriptionOf> ?manifestation .
+					?transcript <http://purl.org/dc/elements/1.1/title> ?transcript_title  .
+	        ?transcript <http://scta.info/property/status> ?transcript_status .
+	        ?transcript <http://scta.info/property/transcriptionType> ?transcript_type .
+	      }"
+			result = self.query(query)
+		end
+		def expressionElementQuery(expression_url, type)
+		# currently assumes expression_url is for a structureType="structureCollection"
+		expression_url = "<#{expression_url}>"
+		elementTypeUrl = "<#{type}>"
+			query = "#{@prefixes}
+				SELECT ?expression ?structureBlock ?resource ?resourceTitle
+	      {
+	        #{expression_url} <http://scta.info/property/hasStructureItem> ?structureItem .
+	        ?structureItem <http://scta.info/property/hasStructureBlock> ?structureBlock .
+	        ?structureBlock <http://scta.info/property/hasStructureElement> ?element .
+	        ?element <http://scta.info/property/structureElementType> #{elementTypeUrl} .
+	        ?element <http://scta.info/property/isPartOfStructureBlock> ?structureBlock .
+	        ?element <http://scta.info/property/isInstanceOf> ?resource .
+	        ?resource <http://purl.org/dc/elements/1.1/title> ?resourceTitle  .
+	      }
+	       	ORDER BY ?resourceTitle
+	       "
+	    result = self.query(query)
+		end
+		def names(item_url)
+		item_url = "<#{item_url}>"
+			query = "#{@prefixes}
+				SELECT ?item ?name ?nameTitle ?mentioningItem
+	      {
+	        #{item_url} <http://scta.info/property/mentions> ?name .
+	        ?name <http://purl.org/dc/elements/1.1/title> ?nameTitle  .
+	      }
+	       	ORDER BY ?nameTitle
+	       "
+	    result = self.query(query)
+		end
+		def quotes(item_url)
+			item_url = "<#{item_url}>"
+				query = "#{@prefixes}
+					SELECT ?item ?quote ?quoteText ?quoteCitation
+		      {
+		        #{item_url} <http://scta.info/property/quotes> ?quote .
+		        ?quote <http://scta.info/property/quotation> ?quoteText .
+		        ?quote <http://scta.info/property/citation> ?quoteCitation .
+		       }
+		       ORDER BY ?quoteText
+		       "
+		    result = self.query(query)
+		end
+	end
+end

data/lib/lbp/resource.rb ADDED

@@ -0,0 +1,72 @@
+require 'openssl'
+require 'rdf'
+require 'rdf/rdfxml'
+require 'rdf/ntriples'
+require 'rdf/vocab'
+require 'lbp'
+module Lbp
+	class Resource
+		attr_reader :resource_shortId, :resource_url, :results
+		def initialize(resource_id)
+			# fist conditions check to see if search results
+			# are being passed
+			if resource_id.class != String
+				@results = resource_id
+				# resource should should be returned instead of "unsure"
+				@resource_shortId = @results.first[:s].to_s.split("resource/").last
+				@resource_url = @results.first[:s].to_s
+				# if resource id is a string rather than results
+			# it looks ot see if this is a URL to query for results
+			elsif resource_id.include? "http"
+				@query = Query.new();
+				@results = @query.subject("<" + resource_id + ">")
+				@resource_url = resource_id
+				@resource_shortId = resource_id.split("resource/").last
+			# finally, it looks for results using the shortId
+			else
+				@query = Query.new();
+				@results = @query.subject_with_short_id(resource_id)
+				@resource_url = "http://scta.info/resource/" + resource_id
+				@resource_shortId = resource_id
+			end
+		end
+		def convert
+			#this conditional should be replaced
+			# by a function that converts the string
+			# into a class name
+			if self.type_shortId == 'workGroup'
+				return WorkGroup.new(@results)
+			elsif self.type_shortId == 'work'
+				return Work.new(@results)
+			elsif self.type_shortId == 'expression'
+				return Expression.new(@results)
+			elsif self.type_shortId == "manifestation"
+				return Manifestation.new(@results)
+			elsif self.type_shortId == "transcription"
+				return Transcription.new(@results)
+			else
+				puts "no subclass to conver to"
+				return self
+			end
+		end
+		def type_shortId
+			type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s.split("/").last
+		end
+		def type
+			type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s
+		end
+		def title
+			type = @results.dup.filter(:p => RDF::URI(RDF::Vocab::DC11.title)).first[:o].to_s
+		end
+		## structure type should be moved to expression and other classes because it's not generic enough
+		## some resources like quotes or name will not have structure type
+		def structureType_shortId
+			type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s.split("/").last
+		end
+		def structureType
+			type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s
+		end
+	end
+end