lbp 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,120 @@
1
+ require 'nokogiri'
2
+ #require 'rugged'
3
+ #require 'lbp/functions'
4
+ require 'lbp'
5
+
6
+ module Lbp
7
+ class FilePart
8
+ attr_reader :partid, :element
9
+ def initialize(filepath, transcription_type, confighash, partid)
10
+
11
+ @confighash = confighash
12
+
13
+ @partid = partid
14
+ @filepath = filepath
15
+ @transcription_type = transcription_type
16
+ @element = self.element_name
17
+
18
+ end
19
+
20
+ def element_name
21
+ transcr = File.new(@filepath, @transcription_type, @confighash)
22
+ xmlobject = transcr.nokogiri
23
+ element_name = xmlobject.xpath("name(//node()[@xml:id='#{@partid}'])", 'tei' => 'http://www.tei-c.org/ns/1.0')
24
+ end
25
+
26
+ # def number
27
+ # transcr = File.new(@filepath, @transcription_type, @confighash)
28
+ # totalparts = transcr.number_of_body_paragraphs
29
+ # xmlobject = transcr.nokogiri
30
+ # parts_following = xmlobject.xpath("//tei:body//tei:#{@element}[preceding::tei:#{@element}[@xml:id='#{@partid}']]", 'tei' => 'http://www.tei-c.org/ns/1.0').count
31
+ # part_number = totalparts - parts_following
32
+
33
+
34
+ # return part_number
35
+ # end
36
+ def next
37
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
38
+ nextpartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/following::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
39
+ if nextpartid.text == nil
40
+ return nil
41
+ else
42
+ return FilePart.new(@filepath, @transcription_type, @confighash, nextpartid.text)
43
+ end
44
+ end
45
+ def previous
46
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
47
+ previouspartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/preceding::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
48
+ if previouspartid.empty?
49
+ return nil
50
+ else
51
+ return FilePart.new(@filepath, @transcription_type, @confighash, previouspartid.text)
52
+ end
53
+ end
54
+ def number_of_zones
55
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
56
+ partid_with_hash = "#" + @partid
57
+ result = xmlobject.xpath("/tei:TEI/tei:facsimile//tei:surface/tei:zone[@start='#{partid_with_hash}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
58
+ return result.count
59
+ end
60
+
61
+ def xml
62
+ result = File.new(@filepath, @transcription_type, @confighash).nokogiri
63
+ p = result.xpath("//tei:#{@element}[@xml:id='#{@partid}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
64
+ end
65
+
66
+ def transform(xsltfile, xslt_param_array=[])
67
+ result = File.new(@filepath, @transcription_type, @confighash).transform(xsltfile, xslt_param_array)
68
+ p = result.xpath("//#{@element}[@id='#{@partid}']")
69
+ return p
70
+ end
71
+ # TODO
72
+ # not working because result of transformation is no longer valid xml document
73
+ # might be easier to pass the part if to the xslt_param_array and then return the result
74
+ #def transform_main_view(xslt_param_array=[])
75
+ # result = File.new(@filepath, @transcription_type, @confighash).transform_main_view(xslt_param_array)
76
+ # #p = result.xpath("//#{@element}[@id='#{@partid}']")
77
+ # #hard coding for the moment so it will only really work for paragraphs
78
+ # p = result.xpath("//div[@id='pwrap_#{@partid}']")
79
+ # return p
80
+ #end
81
+ def transform_plain_text(xslt_param_array=[])
82
+ # not that it could be slightly confusing that paragraph plain text uses the transform clean,
83
+ # because we still the basic paragraph elements in order to select the desired paragraph
84
+ result = File.new(@filepath, @transcription_type, @confighash).transform_clean_nokogiri(xslt_param_array)
85
+
86
+ p = result.xpath("//#{@element}[@id='#{@partid}']")
87
+ return p
88
+ end
89
+
90
+ def word_count
91
+ plaintext = self.transform_plain_text
92
+ size = plaintext.text.split.size
93
+ end
94
+ def word_array
95
+ plaintext = self.transform_plain_text
96
+ word_array = plaintext.text.split
97
+ word_array.map!{ |word| word.downcase}
98
+ end
99
+ def word_frequency(sort='frequency', order='descending')
100
+ word_array = self.word_array
101
+ wf = Hash.new(0)
102
+ word_array.each { |word| wf[word] += 1 }
103
+
104
+ if sort == "frequency"
105
+ if order == "descending" # high to low
106
+ wf = wf.sort_by{|k,v| v}.reverse
107
+ elsif order == "ascending" # low to high
108
+ wf = wf.sort_by{|k,v| v}
109
+ end
110
+ elsif sort == "word"
111
+ if order == "descending" # z - a
112
+ wf = wf.sort_by{|k,v| k}.reverse
113
+ elsif order == "ascending" #a - z
114
+ wf = wf.sort_by{|k,v| k}
115
+ end
116
+ end
117
+ return wf.to_h
118
+ end
119
+ end
120
+ end
@@ -2,11 +2,20 @@
2
2
  require 'nokogiri'
3
3
  require 'open-uri'
4
4
 
5
- def xslt_transform(xmlfile, xsltfile, xslt_param_array)
6
- xml = Nokogiri::XML(open(xmlfile))
5
+ def xslt_transform(xml_open_uri_file, xsltfile, xslt_param_array)
6
+ xml = xml_open_uri_file
7
7
  xslt = Nokogiri::XSLT(open(xsltfile))
8
8
  result_doc = xslt.transform(xml, xslt_param_array)
9
9
  return result_doc
10
10
  end
11
11
 
12
+ def xslt_apply_to(xml_open_uri_file, xsltfile, xslt_param_array)
13
+ xml = xml_open_uri_file
14
+ xslt = Nokogiri::XSLT(open(xsltfile))
15
+ result_doc = xslt.apply_to(xml, xslt_param_array)
16
+ return result_doc
17
+ end
18
+
19
+
20
+
12
21
 
@@ -0,0 +1,36 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+ module Lbp
10
+ class Manifestation < Resource
11
+
12
+ #inherits initialization from Resource
13
+
14
+ def transcriptionUrls
15
+ results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasTranscription"))
16
+ transcriptions = results.map {|m| m[:o].to_s}
17
+ return transcriptions
18
+ end
19
+ def canonicalTranscriptionUrl
20
+ # TODO this check against an empty array should
21
+ # occur everywhere the filter is used
22
+ # maybe we need a helper function that does this once
23
+ unless self.results.count == 0
24
+ transcriptionUrl = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalTranscription")).first[:o].to_s
25
+ return transcriptionUrl
26
+ end
27
+ end
28
+ def canonicalTranscription
29
+ url = self.canonicalTranscriptionUrl
30
+ transcriptionObj = Transcription.new(url)
31
+ return transcriptionObj
32
+ end
33
+ end
34
+ end
35
+
36
+
@@ -0,0 +1,39 @@
1
+ require 'nokogiri'
2
+ require 'lbp/functions'
3
+
4
+ module Lbp
5
+ class ParagraphImage
6
+ attr_reader :pid
7
+
8
+ #def initialize(confighash, filehash, pid, position=1)
9
+ def initialize(paragraphurl, position=1)
10
+ @query = Query.new();
11
+ @results = @query.zone_info("<" + paragraphurl + ">")
12
+ @zone_index = position - 1
13
+ end
14
+ def ulx
15
+ return @results[@zone_index][:ulx].to_s.to_i
16
+ end
17
+ def uly
18
+ return @results[@zone_index][:uly].to_s.to_i
19
+ end
20
+ def lrx
21
+ return @results[@zone_index][:lrx].to_s.to_i
22
+ end
23
+ def lry
24
+ return @results[@zone_index][:lry].to_s.to_i
25
+ end
26
+ def width
27
+ return @results[@zone_index][:width].to_s.to_i
28
+ end
29
+ def height
30
+ return @results[@zone_index][:height].to_s.to_i
31
+ end
32
+ def url
33
+ return @results[@zone_index][:canvasurl].to_s.split("/").last + ".jpg"
34
+ end
35
+ def canvas
36
+ return @results[@zone_index][:canvasurl].to_s.split("/").last
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,181 @@
1
+ require 'sparql'
2
+
3
+ module Lbp
4
+ class Query
5
+
6
+ def initialize
7
+
8
+ @prefixes = "
9
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
10
+ PREFIX dbpedia: <http://dbpedia.org/ontology/>
11
+ PREFIX dcterms: <http://purl.org/dc/terms/>
12
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
13
+ PREFIX sctap: <http://scta.info/property/>
14
+ PREFIX sctar: <http://scta.info/resource/>
15
+ PREFIX sctat: <http://scta.info/text/>
16
+ PREFIX role: <http://www.loc.gov/loc.terms/relators/>
17
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
18
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
19
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
20
+ "
21
+ end
22
+ def query(query)
23
+ if ENV['SPARQL'] == "local"
24
+ sparqlendpoint = "http://localhost:3030/ds/query"
25
+ elsif ENV['SPARQL'] == "staging"
26
+ sparqlendpoint = "http://sparql-staging.scta.info/ds/query"
27
+ elsif ENV['RACK_ENV'] == "production" || ENV['SPARQL'] == "production"
28
+ sparqlendpoint = "http://sparql.scta.info/ds/query"
29
+ else
30
+ sparqlendpoint = "http://sparql.scta.info/ds/query"
31
+ end
32
+
33
+ sparql = SPARQL::Client.new(sparqlendpoint)
34
+ result = sparql.query(query)
35
+
36
+ return result
37
+ end
38
+ def subject(url)
39
+ query = "#{@prefixes}
40
+
41
+ SELECT ?p ?o ?ptype
42
+ {
43
+ #{url} ?p ?o .
44
+ OPTIONAL {
45
+ ?p rdfs:subPropertyOf ?ptype .
46
+ }
47
+
48
+ }
49
+ ORDER BY ?p
50
+ "
51
+ result = self.query(query)
52
+ end
53
+ def subject_with_short_id(shortid)
54
+ query = "#{@prefixes}
55
+
56
+ SELECT ?p ?o ?ptype
57
+ {
58
+ ?resource <http://scta.info/property/shortId> '#{shortid}' .
59
+ ?resource ?p ?o .
60
+ OPTIONAL {
61
+ ?p rdfs:subPropertyOf ?ptype .
62
+ }
63
+
64
+ }
65
+ ORDER BY ?p
66
+ "
67
+ result = self.query(query)
68
+ end
69
+
70
+ def zone_info(paragraphurl)
71
+ query = "#{@prefixes}
72
+ SELECT DISTINCT ?zone ?ulx ?uly ?lry ?lrx ?position ?height ?width ?canvasurl
73
+ {
74
+ #{paragraphurl} <http://scta.info/property/hasZone> ?zone .
75
+ ?zone <http://scta.info/property/ulx> ?ulx .
76
+ ?zone <http://scta.info/property/uly> ?uly .
77
+ ?zone <http://scta.info/property/lry> ?lry .
78
+ ?zone <http://scta.info/property/lrx> ?lrx .
79
+ ?zone <http://scta.info/property/position> ?position .
80
+ ?zone <http://scta.info/property/height> ?height .
81
+ ?zone <http://scta.info/property/width> ?width .
82
+ ?zone <http://scta.info/property/isZoneOn> ?canvasurl .
83
+ }
84
+ ORDER BY ?position"
85
+ result = self.query(query)
86
+ end
87
+
88
+
89
+ def collection_query(collection_url)
90
+ query = "#{@prefixes}
91
+
92
+ SELECT ?collectiontitle ?title ?item ?questiontitle ?order ?status ?gitRepository
93
+ {
94
+ #{collection_url} <http://scta.info/property/hasStructureItem> ?item .
95
+ #{collection_url} <http://purl.org/dc/elements/1.1/title> ?collectiontitle .
96
+ ?item <http://purl.org/dc/elements/1.1/title> ?title .
97
+ ?item <http://scta.info/property/totalOrderNumber> ?order .
98
+ ?item <http://scta.info/property/status> ?status .
99
+ ?item <http://scta.info/property/gitRepository> ?gitRepository .
100
+
101
+
102
+ OPTIONAL
103
+ {
104
+ ?item <http://scta.info/property/questionTitle> ?questiontitle .
105
+ }
106
+ }
107
+ ORDER BY ?order"
108
+
109
+ result = self.query(query)
110
+ end
111
+
112
+ def item_query(expression_url)
113
+ query = "#{@prefixes}
114
+
115
+ SELECT ?item_title ?transcript ?transcript_title ?transcript_status ?transcript_type ?manifestation
116
+ {
117
+ #{expression_url} <http://purl.org/dc/elements/1.1/title> ?item_title .
118
+ ?manifestation <http://scta.info/property/isManifestationOf> #{expression_url} .
119
+
120
+ ?transcript <http://scta.info/property/isTranscriptionOf> ?manifestation .
121
+ ?transcript <http://purl.org/dc/elements/1.1/title> ?transcript_title .
122
+ ?transcript <http://scta.info/property/status> ?transcript_status .
123
+ ?transcript <http://scta.info/property/transcriptionType> ?transcript_type .
124
+ }"
125
+
126
+ result = self.query(query)
127
+
128
+ end
129
+ def expressionElementQuery(expression_url, type)
130
+ # currently assumes expression_url is for a structureType="structureCollection"
131
+ expression_url = "<#{expression_url}>"
132
+ elementTypeUrl = "<#{type}>"
133
+ query = "#{@prefixes}
134
+
135
+ SELECT ?expression ?structureBlock ?resource ?resourceTitle
136
+ {
137
+
138
+ #{expression_url} <http://scta.info/property/hasStructureItem> ?structureItem .
139
+ ?structureItem <http://scta.info/property/hasStructureBlock> ?structureBlock .
140
+ ?structureBlock <http://scta.info/property/hasStructureElement> ?element .
141
+ ?element <http://scta.info/property/structureElementType> #{elementTypeUrl} .
142
+ ?element <http://scta.info/property/isPartOfStructureBlock> ?structureBlock .
143
+ ?element <http://scta.info/property/isInstanceOf> ?resource .
144
+ ?resource <http://purl.org/dc/elements/1.1/title> ?resourceTitle .
145
+ }
146
+ ORDER BY ?resourceTitle
147
+
148
+ "
149
+ result = self.query(query)
150
+ end
151
+ def names(item_url)
152
+ item_url = "<#{item_url}>"
153
+ query = "#{@prefixes}
154
+
155
+ SELECT ?item ?name ?nameTitle ?mentioningItem
156
+ {
157
+ #{item_url} <http://scta.info/property/mentions> ?name .
158
+ ?name <http://purl.org/dc/elements/1.1/title> ?nameTitle .
159
+ }
160
+ ORDER BY ?nameTitle
161
+
162
+ "
163
+ result = self.query(query)
164
+ end
165
+
166
+ def quotes(item_url)
167
+ item_url = "<#{item_url}>"
168
+ query = "#{@prefixes}
169
+
170
+ SELECT ?item ?quote ?quoteText ?quoteCitation
171
+ {
172
+ #{item_url} <http://scta.info/property/quotes> ?quote .
173
+ ?quote <http://scta.info/property/quotation> ?quoteText .
174
+ ?quote <http://scta.info/property/citation> ?quoteCitation .
175
+ }
176
+ ORDER BY ?quoteText
177
+ "
178
+ result = self.query(query)
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,72 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+ module Lbp
10
+ class Resource
11
+ attr_reader :resource_shortId, :resource_url, :results
12
+ def initialize(resource_id)
13
+ # fist conditions check to see if search results
14
+ # are being passed
15
+ if resource_id.class != String
16
+ @results = resource_id
17
+ # resource should should be returned instead of "unsure"
18
+ @resource_shortId = @results.first[:s].to_s.split("resource/").last
19
+ @resource_url = @results.first[:s].to_s
20
+ # if resource id is a string rather than results
21
+ # it looks ot see if this is a URL to query for results
22
+ elsif resource_id.include? "http"
23
+ @query = Query.new();
24
+ @results = @query.subject("<" + resource_id + ">")
25
+ @resource_url = resource_id
26
+ @resource_shortId = resource_id.split("resource/").last
27
+ # finally, it looks for results using the shortId
28
+ else
29
+ @query = Query.new();
30
+ @results = @query.subject_with_short_id(resource_id)
31
+ @resource_url = "http://scta.info/resource/" + resource_id
32
+ @resource_shortId = resource_id
33
+ end
34
+ end
35
+ def convert
36
+ #this conditional should be replaced
37
+ # by a function that converts the string
38
+ # into a class name
39
+ if self.type_shortId == 'workGroup'
40
+ return WorkGroup.new(@results)
41
+ elsif self.type_shortId == 'work'
42
+ return Work.new(@results)
43
+ elsif self.type_shortId == 'expression'
44
+ return Expression.new(@results)
45
+ elsif self.type_shortId == "manifestation"
46
+ return Manifestation.new(@results)
47
+ elsif self.type_shortId == "transcription"
48
+ return Transcription.new(@results)
49
+ else
50
+ puts "no subclass to conver to"
51
+ return self
52
+ end
53
+ end
54
+ def type_shortId
55
+ type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s.split("/").last
56
+ end
57
+ def type
58
+ type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s
59
+ end
60
+ def title
61
+ type = @results.dup.filter(:p => RDF::URI(RDF::Vocab::DC11.title)).first[:o].to_s
62
+ end
63
+ ## structure type should be moved to expression and other classes because it's not generic enough
64
+ ## some resources like quotes or name will not have structure type
65
+ def structureType_shortId
66
+ type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s.split("/").last
67
+ end
68
+ def structureType
69
+ type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s
70
+ end
71
+ end
72
+ end