lbp 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,120 @@
1
+ require 'nokogiri'
2
+ #require 'rugged'
3
+ #require 'lbp/functions'
4
+ require 'lbp'
5
+
6
+ module Lbp
7
+ class FilePart
8
+ attr_reader :partid, :element
9
+ def initialize(filepath, transcription_type, confighash, partid)
10
+
11
+ @confighash = confighash
12
+
13
+ @partid = partid
14
+ @filepath = filepath
15
+ @transcription_type = transcription_type
16
+ @element = self.element_name
17
+
18
+ end
19
+
20
+ def element_name
21
+ transcr = File.new(@filepath, @transcription_type, @confighash)
22
+ xmlobject = transcr.nokogiri
23
+ element_name = xmlobject.xpath("name(//node()[@xml:id='#{@partid}'])", 'tei' => 'http://www.tei-c.org/ns/1.0')
24
+ end
25
+
26
+ # def number
27
+ # transcr = File.new(@filepath, @transcription_type, @confighash)
28
+ # totalparts = transcr.number_of_body_paragraphs
29
+ # xmlobject = transcr.nokogiri
30
+ # parts_following = xmlobject.xpath("//tei:body//tei:#{@element}[preceding::tei:#{@element}[@xml:id='#{@partid}']]", 'tei' => 'http://www.tei-c.org/ns/1.0').count
31
+ # part_number = totalparts - parts_following
32
+
33
+
34
+ # return part_number
35
+ # end
36
+ def next
37
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
38
+ nextpartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/following::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
39
+ if nextpartid.text == nil
40
+ return nil
41
+ else
42
+ return FilePart.new(@filepath, @transcription_type, @confighash, nextpartid.text)
43
+ end
44
+ end
45
+ def previous
46
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
47
+ previouspartid = xmlobject.xpath("//tei:#{@element}[@xml:id='#{@partid}']/preceding::tei:#{@element}[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
48
+ if previouspartid.empty?
49
+ return nil
50
+ else
51
+ return FilePart.new(@filepath, @transcription_type, @confighash, previouspartid.text)
52
+ end
53
+ end
54
+ def number_of_zones
55
+ xmlobject = File.new(@filepath, @transcription_type, @confighash).nokogiri
56
+ partid_with_hash = "#" + @partid
57
+ result = xmlobject.xpath("/tei:TEI/tei:facsimile//tei:surface/tei:zone[@start='#{partid_with_hash}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
58
+ return result.count
59
+ end
60
+
61
+ def xml
62
+ result = File.new(@filepath, @transcription_type, @confighash).nokogiri
63
+ p = result.xpath("//tei:#{@element}[@xml:id='#{@partid}']", 'tei' => 'http://www.tei-c.org/ns/1.0')
64
+ end
65
+
66
+ def transform(xsltfile, xslt_param_array=[])
67
+ result = File.new(@filepath, @transcription_type, @confighash).transform(xsltfile, xslt_param_array)
68
+ p = result.xpath("//#{@element}[@id='#{@partid}']")
69
+ return p
70
+ end
71
+ # TODO
72
+ # not working because result of transformation is no longer valid xml document
73
+ # might be easier to pass the part if to the xslt_param_array and then return the result
74
+ #def transform_main_view(xslt_param_array=[])
75
+ # result = File.new(@filepath, @transcription_type, @confighash).transform_main_view(xslt_param_array)
76
+ # #p = result.xpath("//#{@element}[@id='#{@partid}']")
77
+ # #hard coding for the moment so it will only really work for paragraphs
78
+ # p = result.xpath("//div[@id='pwrap_#{@partid}']")
79
+ # return p
80
+ #end
81
+ def transform_plain_text(xslt_param_array=[])
82
+ # not that it could be slightly confusing that paragraph plain text uses the transform clean,
83
+ # because we still the basic paragraph elements in order to select the desired paragraph
84
+ result = File.new(@filepath, @transcription_type, @confighash).transform_clean_nokogiri(xslt_param_array)
85
+
86
+ p = result.xpath("//#{@element}[@id='#{@partid}']")
87
+ return p
88
+ end
89
+
90
+ def word_count
91
+ plaintext = self.transform_plain_text
92
+ size = plaintext.text.split.size
93
+ end
94
+ def word_array
95
+ plaintext = self.transform_plain_text
96
+ word_array = plaintext.text.split
97
+ word_array.map!{ |word| word.downcase}
98
+ end
99
+ def word_frequency(sort='frequency', order='descending')
100
+ word_array = self.word_array
101
+ wf = Hash.new(0)
102
+ word_array.each { |word| wf[word] += 1 }
103
+
104
+ if sort == "frequency"
105
+ if order == "descending" # high to low
106
+ wf = wf.sort_by{|k,v| v}.reverse
107
+ elsif order == "ascending" # low to high
108
+ wf = wf.sort_by{|k,v| v}
109
+ end
110
+ elsif sort == "word"
111
+ if order == "descending" # z - a
112
+ wf = wf.sort_by{|k,v| k}.reverse
113
+ elsif order == "ascending" #a - z
114
+ wf = wf.sort_by{|k,v| k}
115
+ end
116
+ end
117
+ return wf.to_h
118
+ end
119
+ end
120
+ end
@@ -2,11 +2,20 @@
2
2
  require 'nokogiri'
3
3
  require 'open-uri'
4
4
 
5
- def xslt_transform(xmlfile, xsltfile, xslt_param_array)
6
- xml = Nokogiri::XML(open(xmlfile))
5
+ def xslt_transform(xml_open_uri_file, xsltfile, xslt_param_array)
6
+ xml = xml_open_uri_file
7
7
  xslt = Nokogiri::XSLT(open(xsltfile))
8
8
  result_doc = xslt.transform(xml, xslt_param_array)
9
9
  return result_doc
10
10
  end
11
11
 
12
+ def xslt_apply_to(xml_open_uri_file, xsltfile, xslt_param_array)
13
+ xml = xml_open_uri_file
14
+ xslt = Nokogiri::XSLT(open(xsltfile))
15
+ result_doc = xslt.apply_to(xml, xslt_param_array)
16
+ return result_doc
17
+ end
18
+
19
+
20
+
12
21
 
@@ -0,0 +1,36 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+ module Lbp
10
+ class Manifestation < Resource
11
+
12
+ #inherits initialization from Resource
13
+
14
+ def transcriptionUrls
15
+ results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasTranscription"))
16
+ transcriptions = results.map {|m| m[:o].to_s}
17
+ return transcriptions
18
+ end
19
+ def canonicalTranscriptionUrl
20
+ # TODO this check against an empty array should
21
+ # occur everywhere the filter is used
22
+ # maybe we need a helper function that does this once
23
+ unless self.results.count == 0
24
+ transcriptionUrl = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalTranscription")).first[:o].to_s
25
+ return transcriptionUrl
26
+ end
27
+ end
28
+ def canonicalTranscription
29
+ url = self.canonicalTranscriptionUrl
30
+ transcriptionObj = Transcription.new(url)
31
+ return transcriptionObj
32
+ end
33
+ end
34
+ end
35
+
36
+
@@ -0,0 +1,39 @@
1
+ require 'nokogiri'
2
+ require 'lbp/functions'
3
+
4
+ module Lbp
5
+ class ParagraphImage
6
+ attr_reader :pid
7
+
8
+ #def initialize(confighash, filehash, pid, position=1)
9
+ def initialize(paragraphurl, position=1)
10
+ @query = Query.new();
11
+ @results = @query.zone_info("<" + paragraphurl + ">")
12
+ @zone_index = position - 1
13
+ end
14
+ def ulx
15
+ return @results[@zone_index][:ulx].to_s.to_i
16
+ end
17
+ def uly
18
+ return @results[@zone_index][:uly].to_s.to_i
19
+ end
20
+ def lrx
21
+ return @results[@zone_index][:lrx].to_s.to_i
22
+ end
23
+ def lry
24
+ return @results[@zone_index][:lry].to_s.to_i
25
+ end
26
+ def width
27
+ return @results[@zone_index][:width].to_s.to_i
28
+ end
29
+ def height
30
+ return @results[@zone_index][:height].to_s.to_i
31
+ end
32
+ def url
33
+ return @results[@zone_index][:canvasurl].to_s.split("/").last + ".jpg"
34
+ end
35
+ def canvas
36
+ return @results[@zone_index][:canvasurl].to_s.split("/").last
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,181 @@
1
+ require 'sparql'
2
+
3
+ module Lbp
4
+ class Query
5
+
6
+ def initialize
7
+
8
+ @prefixes = "
9
+ PREFIX owl: <http://www.w3.org/2002/07/owl#>
10
+ PREFIX dbpedia: <http://dbpedia.org/ontology/>
11
+ PREFIX dcterms: <http://purl.org/dc/terms/>
12
+ PREFIX dc: <http://purl.org/dc/elements/1.1/>
13
+ PREFIX sctap: <http://scta.info/property/>
14
+ PREFIX sctar: <http://scta.info/resource/>
15
+ PREFIX sctat: <http://scta.info/text/>
16
+ PREFIX role: <http://www.loc.gov/loc.terms/relators/>
17
+ PREFIX xsd: <http://www.w3.org/2001/XMLSchema#>
18
+ PREFIX rdfs: <http://www.w3.org/2000/01/rdf-schema#>
19
+ PREFIX rdf: <http://www.w3.org/1999/02/22-rdf-syntax-ns#>
20
+ "
21
+ end
22
+ def query(query)
23
+ if ENV['SPARQL'] == "local"
24
+ sparqlendpoint = "http://localhost:3030/ds/query"
25
+ elsif ENV['SPARQL'] == "staging"
26
+ sparqlendpoint = "http://sparql-staging.scta.info/ds/query"
27
+ elsif ENV['RACK_ENV'] == "production" || ENV['SPARQL'] == "production"
28
+ sparqlendpoint = "http://sparql.scta.info/ds/query"
29
+ else
30
+ sparqlendpoint = "http://sparql.scta.info/ds/query"
31
+ end
32
+
33
+ sparql = SPARQL::Client.new(sparqlendpoint)
34
+ result = sparql.query(query)
35
+
36
+ return result
37
+ end
38
+ def subject(url)
39
+ query = "#{@prefixes}
40
+
41
+ SELECT ?p ?o ?ptype
42
+ {
43
+ #{url} ?p ?o .
44
+ OPTIONAL {
45
+ ?p rdfs:subPropertyOf ?ptype .
46
+ }
47
+
48
+ }
49
+ ORDER BY ?p
50
+ "
51
+ result = self.query(query)
52
+ end
53
+ def subject_with_short_id(shortid)
54
+ query = "#{@prefixes}
55
+
56
+ SELECT ?p ?o ?ptype
57
+ {
58
+ ?resource <http://scta.info/property/shortId> '#{shortid}' .
59
+ ?resource ?p ?o .
60
+ OPTIONAL {
61
+ ?p rdfs:subPropertyOf ?ptype .
62
+ }
63
+
64
+ }
65
+ ORDER BY ?p
66
+ "
67
+ result = self.query(query)
68
+ end
69
+
70
+ def zone_info(paragraphurl)
71
+ query = "#{@prefixes}
72
+ SELECT DISTINCT ?zone ?ulx ?uly ?lry ?lrx ?position ?height ?width ?canvasurl
73
+ {
74
+ #{paragraphurl} <http://scta.info/property/hasZone> ?zone .
75
+ ?zone <http://scta.info/property/ulx> ?ulx .
76
+ ?zone <http://scta.info/property/uly> ?uly .
77
+ ?zone <http://scta.info/property/lry> ?lry .
78
+ ?zone <http://scta.info/property/lrx> ?lrx .
79
+ ?zone <http://scta.info/property/position> ?position .
80
+ ?zone <http://scta.info/property/height> ?height .
81
+ ?zone <http://scta.info/property/width> ?width .
82
+ ?zone <http://scta.info/property/isZoneOn> ?canvasurl .
83
+ }
84
+ ORDER BY ?position"
85
+ result = self.query(query)
86
+ end
87
+
88
+
89
+ def collection_query(collection_url)
90
+ query = "#{@prefixes}
91
+
92
+ SELECT ?collectiontitle ?title ?item ?questiontitle ?order ?status ?gitRepository
93
+ {
94
+ #{collection_url} <http://scta.info/property/hasStructureItem> ?item .
95
+ #{collection_url} <http://purl.org/dc/elements/1.1/title> ?collectiontitle .
96
+ ?item <http://purl.org/dc/elements/1.1/title> ?title .
97
+ ?item <http://scta.info/property/totalOrderNumber> ?order .
98
+ ?item <http://scta.info/property/status> ?status .
99
+ ?item <http://scta.info/property/gitRepository> ?gitRepository .
100
+
101
+
102
+ OPTIONAL
103
+ {
104
+ ?item <http://scta.info/property/questionTitle> ?questiontitle .
105
+ }
106
+ }
107
+ ORDER BY ?order"
108
+
109
+ result = self.query(query)
110
+ end
111
+
112
+ def item_query(expression_url)
113
+ query = "#{@prefixes}
114
+
115
+ SELECT ?item_title ?transcript ?transcript_title ?transcript_status ?transcript_type ?manifestation
116
+ {
117
+ #{expression_url} <http://purl.org/dc/elements/1.1/title> ?item_title .
118
+ ?manifestation <http://scta.info/property/isManifestationOf> #{expression_url} .
119
+
120
+ ?transcript <http://scta.info/property/isTranscriptionOf> ?manifestation .
121
+ ?transcript <http://purl.org/dc/elements/1.1/title> ?transcript_title .
122
+ ?transcript <http://scta.info/property/status> ?transcript_status .
123
+ ?transcript <http://scta.info/property/transcriptionType> ?transcript_type .
124
+ }"
125
+
126
+ result = self.query(query)
127
+
128
+ end
129
+ def expressionElementQuery(expression_url, type)
130
+ # currently assumes expression_url is for a structureType="structureCollection"
131
+ expression_url = "<#{expression_url}>"
132
+ elementTypeUrl = "<#{type}>"
133
+ query = "#{@prefixes}
134
+
135
+ SELECT ?expression ?structureBlock ?resource ?resourceTitle
136
+ {
137
+
138
+ #{expression_url} <http://scta.info/property/hasStructureItem> ?structureItem .
139
+ ?structureItem <http://scta.info/property/hasStructureBlock> ?structureBlock .
140
+ ?structureBlock <http://scta.info/property/hasStructureElement> ?element .
141
+ ?element <http://scta.info/property/structureElementType> #{elementTypeUrl} .
142
+ ?element <http://scta.info/property/isPartOfStructureBlock> ?structureBlock .
143
+ ?element <http://scta.info/property/isInstanceOf> ?resource .
144
+ ?resource <http://purl.org/dc/elements/1.1/title> ?resourceTitle .
145
+ }
146
+ ORDER BY ?resourceTitle
147
+
148
+ "
149
+ result = self.query(query)
150
+ end
151
+ def names(item_url)
152
+ item_url = "<#{item_url}>"
153
+ query = "#{@prefixes}
154
+
155
+ SELECT ?item ?name ?nameTitle ?mentioningItem
156
+ {
157
+ #{item_url} <http://scta.info/property/mentions> ?name .
158
+ ?name <http://purl.org/dc/elements/1.1/title> ?nameTitle .
159
+ }
160
+ ORDER BY ?nameTitle
161
+
162
+ "
163
+ result = self.query(query)
164
+ end
165
+
166
+ def quotes(item_url)
167
+ item_url = "<#{item_url}>"
168
+ query = "#{@prefixes}
169
+
170
+ SELECT ?item ?quote ?quoteText ?quoteCitation
171
+ {
172
+ #{item_url} <http://scta.info/property/quotes> ?quote .
173
+ ?quote <http://scta.info/property/quotation> ?quoteText .
174
+ ?quote <http://scta.info/property/citation> ?quoteCitation .
175
+ }
176
+ ORDER BY ?quoteText
177
+ "
178
+ result = self.query(query)
179
+ end
180
+ end
181
+ end
@@ -0,0 +1,72 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+ module Lbp
10
+ class Resource
11
+ attr_reader :resource_shortId, :resource_url, :results
12
+ def initialize(resource_id)
13
+ # fist conditions check to see if search results
14
+ # are being passed
15
+ if resource_id.class != String
16
+ @results = resource_id
17
+ # resource should should be returned instead of "unsure"
18
+ @resource_shortId = @results.first[:s].to_s.split("resource/").last
19
+ @resource_url = @results.first[:s].to_s
20
+ # if resource id is a string rather than results
21
+ # it looks ot see if this is a URL to query for results
22
+ elsif resource_id.include? "http"
23
+ @query = Query.new();
24
+ @results = @query.subject("<" + resource_id + ">")
25
+ @resource_url = resource_id
26
+ @resource_shortId = resource_id.split("resource/").last
27
+ # finally, it looks for results using the shortId
28
+ else
29
+ @query = Query.new();
30
+ @results = @query.subject_with_short_id(resource_id)
31
+ @resource_url = "http://scta.info/resource/" + resource_id
32
+ @resource_shortId = resource_id
33
+ end
34
+ end
35
+ def convert
36
+ #this conditional should be replaced
37
+ # by a function that converts the string
38
+ # into a class name
39
+ if self.type_shortId == 'workGroup'
40
+ return WorkGroup.new(@results)
41
+ elsif self.type_shortId == 'work'
42
+ return Work.new(@results)
43
+ elsif self.type_shortId == 'expression'
44
+ return Expression.new(@results)
45
+ elsif self.type_shortId == "manifestation"
46
+ return Manifestation.new(@results)
47
+ elsif self.type_shortId == "transcription"
48
+ return Transcription.new(@results)
49
+ else
50
+ puts "no subclass to conver to"
51
+ return self
52
+ end
53
+ end
54
+ def type_shortId
55
+ type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s.split("/").last
56
+ end
57
+ def type
58
+ type = @results.dup.filter(:p => RDF::URI("http://www.w3.org/1999/02/22-rdf-syntax-ns#type")).first[:o].to_s
59
+ end
60
+ def title
61
+ type = @results.dup.filter(:p => RDF::URI(RDF::Vocab::DC11.title)).first[:o].to_s
62
+ end
63
+ ## structure type should be moved to expression and other classes because it's not generic enough
64
+ ## some resources like quotes or name will not have structure type
65
+ def structureType_shortId
66
+ type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s.split("/").last
67
+ end
68
+ def structureType
69
+ type = @results.dup.filter(:p => RDF::URI("http://scta.info/property/structureType")).first[:o].to_s
70
+ end
71
+ end
72
+ end