lbp 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,249 +1,33 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
- require 'lbp/item'
5
- require 'open-uri'
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
6
7
 
7
8
  module Lbp
8
- class Transcription
9
- attr_reader :fs, :type, :ed, :xslt_dir
10
-
11
- def initialize(projectfile, filehash)
12
-
13
- @filehash = filehash
14
- @projectfile = projectfile
15
-
16
- @fs = filehash[:fs]
17
- @type = filehash[:type] # critical or documentary
18
- @ed = filehash[:ed]
19
-
20
- @confighash = Collection.new(@projectfile).confighash
21
- @xslthash = @confighash[:xslt_dirs]
22
-
23
- #xslt version needs to gathered from a method
24
- xslt_version = nil
25
- #for now its being set to nil because no documents currently declare it
26
-
27
- if xslt_version == nil
28
- @schema = @xslthash["default"]
29
- else
30
- @schema = @xslthash[xslt_version]
31
- end
32
-
33
- if @type == 'critical'
34
- @xslt_dir = @schema[:critical]
35
- elsif @type == 'documentary'
36
- @xslt_dir = @schema[:documentary]
37
- end
38
-
39
-
40
- if @filehash[:source] == 'local'
41
- item = Item.new(@projectfile, @fs)
42
- @current_branch = item.git_current_branch
43
- # the effort here is to only set instance variable when absolutely necessary
44
- if @current_branch != @ed
45
- @item = item
46
- end
47
- end
9
+ class Transcription < Resource
10
+ #initionalization handled by Resource Class
11
+ def file_path
12
+ file_path = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasXML")).first[:o].to_s
13
+ end
14
+ def transcription_type
15
+ type = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/transcriptionType")).first[:o].to_s
16
+ type.downcase
48
17
  end
49
- ## Begin file path methods
50
- # Returns the absolute path of the file requested
51
- def file_path
52
- @filehash[:path]
53
- end
54
- def file
55
-
56
- file = open(self.file_path)
57
- end
58
- def nokogiri
59
- xmldoc = Nokogiri::XML(self.file)
60
18
 
61
- end
62
- ## End File Path Methods
63
- ### Item Header Extraction and Metadata Methods
64
- def title
65
- xmldoc = self.nokogiri
66
-
67
- title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
68
- return title.text
69
- end
70
- def author
71
- xmldoc = self.nokogiri
72
- author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
73
- return author.text
74
- end
75
- def editor
76
- xmldoc = self.nokogiri
77
- editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
78
- return editor.text
79
- end
80
- def ed_no
81
- xmldoc = self.nokogiri
82
- ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
83
- return ed_no.value
84
- end
85
- def ed_date
86
- xmldoc = self.nokogiri
87
- ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
88
- return ed_date.value
89
- end
90
- def pub_date
91
- xmldoc = self.nokogiri
92
- pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
93
- return pub_date.value
94
- end
95
- def encoding_method
96
- xmldoc = self.nokogiri
97
- encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
98
- return encoding_method.value
99
- end
100
- def encoding_location
101
- xmldoc = self.nokogiri
102
- encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
103
- return encoding_location.value
104
- end
105
- def number_of_columns
106
- xmldoc = self.nokogiri
107
- test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
108
- if @type == "critical"
109
- number_of_columns = nil
110
- elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
111
- number_of_columns = 1
112
- elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
113
- number_of_columns = 2
114
- end
115
- return number_of_columns
116
- end
117
-
118
- =begin - I think these methods belong with the Item or ItemRepo Object
119
-
120
- ### End Header and Metadata Information Extraction Methods ###
121
- ### Begin GIT functions ###
122
- def is_git_dir
123
- gitpath = @file_dir + ".git"
124
-
125
- if File.directory?(gitpath)
126
- true
127
- else
128
- false
129
- end
130
- end
131
- def git_branches
132
- repo = Rugged::Repository.new(@file_dir)
133
- branches = repo.branches.map { |branch| branch.name }
134
- return branches
135
- end
136
- def git_current_branch
137
- repo = Rugged::Repository.new(@file_dir)
138
- current_branch = repo.head.name.gsub(%r!\Arefs/heads/(.*)\z!) { $1 }
139
- return current_branch
140
- end
141
- def git_tags
142
- repo = Rugged::Repository.new(@file_dir)
143
- tags = repo.tags.map { |tag| tag.name }
144
- return tags
145
- end
146
- #need test for this
147
- def git_checkout(branch)
148
- repo = Rugged::Repository.new(@file_dir)
149
- repo.checkout(branch)
150
- end
151
- ### End Git Methods ###
152
- =end
153
- ### Begin transform (XSLT) methocs ###
154
- def transform(xsltfile, xslt_param_array=[])
155
-
156
- xmlfile = self.file_path
157
- if @current_branch != @ed && @filehash[:source] == 'local'
158
- @item.git_checkout(@ed)
159
- doc = xslt_transform(xmlfile, xsltfile, xslt_param_array)
160
- @item.git_checkout(@current_branch);
161
- else
162
- doc = xslt_transform(xmlfile, xsltfile, xslt_param_array)
163
- end
164
- end
165
-
166
- def transform_main_view(xslt_param_array=[])
167
- xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
168
- doc = self.transform(xsltfile, xslt_param_array=[])
169
- end
170
- def transform_index_view(xslt_param_array=[])
171
- xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
172
- doc = self.transform( xsltfile, xslt_param_array=[])
173
- end
174
- def transform_clean(xslt_param_array=[])
175
- xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
176
- doc = self.transform(xsltfile, xslt_param_array=[])
177
- end
178
- def transform_plain_text(xslt_param_array=[])
179
- xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
180
- doc = self.transform(xsltfile, xslt_param_array=[])
181
- end
182
- def transform_toc(xslt_param_array=[])
183
- xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
184
- doc = self.transform(xsltfile, xslt_param_array=[])
185
- end
186
- ### End of Transformation Methods ###
187
- ### Begin Statistics Methods ###
188
- def word_count
189
- plaintext = self.transform_plain_text
190
- size = plaintext.text.split.size
191
- end
192
- def word_array
193
- plaintext = self.transform_plain_text
194
- word_array = plaintext.text.split
195
- word_array.map!{ |word| word.downcase}
196
- end
197
- def word_frequency(sort, order)
198
- word_array = self.word_array
199
- wf = Hash.new(0)
200
- word_array.each { |word| wf[word] += 1 }
201
-
202
- if sort == "frequency"
203
- if order == "descending" # high to low
204
- wf = wf.sort_by{|k,v| v}.reverse
205
- elsif order == "ascending" # low to high
206
- wf = wf.sort_by{|k,v| v}
207
- end
208
- elsif sort == "word"
209
- if order == "descending" # z - a
210
- wf = wf.sort_by{|k,v| k}.reverse
211
- elsif order == "ascending" #a - z
212
- wf = wf.sort_by{|k,v| k}
213
- end
214
- end
215
- return wf.to_h
216
- end
217
- def number_of_body_paragraphs
218
- if @current_branch != @ed && @filehash[:source] == 'local'
219
- @item.git_checkout(@ed)
220
- xmldoc = self.nokogiri
221
- p = xmldoc.xpath("//tei:body//tei:p", 'tei' => 'http://www.tei-c.org/ns/1.0')
222
- @item.git_checkout(@current_branch);
223
- else
224
- xmldoc = self.nokogiri
225
- p = xmldoc.xpath("//tei:body//tei:p", 'tei' => 'http://www.tei-c.org/ns/1.0')
226
- end
227
- return p.count
228
- end
229
- def paragraphs
230
- ## it's not good to keep reusing this, git check out condition. Need a better solution
231
- if @current_branch != @ed && @filehash[:source] == 'local'
232
- @item.git_checkout(@ed)
233
- xmldoc = self.nokogiri
234
- paragraphs = xmldoc.xpath("//tei:body//tei:p/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
235
- @item.git_checkout(@current_branch);
236
- else
237
- xmldoc = self.nokogiri
238
- paragraphs = xmldoc.xpath("//tei:body//tei:p/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
239
- end
240
-
241
- paragraph_objects = paragraphs.map do |p| Paragraph.new(@projectfile, @filehash, p.value) end
242
-
243
- return paragraph_objects
244
- end
245
- def paragraph(pid)
246
- Paragraph.new(@projectfile, @filehash, pid)
247
- end
19
+ def file(confighash)
20
+ file = File.new(self.file_path, self.transcription_type, confighash)
21
+ return file
22
+ end
23
+ #NOTE: this really is a temporary method, since the database
24
+ #should point to file corresponding to each transcription
25
+ #dynamically generated by the exist-db database.
26
+ # but this could remain in case it was useful to grab the part
27
+ # from a file that would include a tei header etc.
28
+ def file_part(confighash, partid)
29
+ file = FilePart.new(self.file_path, self.transcription_type, confighash, partid)
30
+ return file
31
+ end
248
32
  end
249
33
  end
@@ -1,3 +1,3 @@
1
1
  module Lbp
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,18 +1,33 @@
1
- $confighash = { texts_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projecfiles/GitTextfiles/",
2
- projectdatafile_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/Conf/",
3
- xslt_critical_dir: "/Users/JCWitt/WebPages/lbpwrapper/lombardpress/public/pl_xslt_stylesheets/",
4
- xslt_documentary_dir: "/Users/JCWitt/WebPages/lbpwrapper/lombardpress/public/pl_xslt_stylesheets/",
5
- xslt_main_view: "text_display.xsl",
6
- xslt_index_view: "text_display_index.xsl",
7
- xslt_clean: "clean_forStatistics.xsl",
8
- xslt_plain_text: "plaintext.xsl",
9
- xslt_toc: "lectio_outline.xsl",
10
- git_repo: "bitbucket.org/jeffreycwitt/"}
1
+ $confighash = {local_texts_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/",
2
+ citation_lists_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/citationlists/",
3
+ xslt_dirs: { "default" => {
4
+ critical: "/Users/jcwitt/Projects/lombardpress/lombardpress2/xslt/default/critical/",
5
+ documentary: "/Users/jcwitt/Projects/lombardpress/lombardpress2/xslt/default/documentary/",
6
+ main_view: "main_view.xsl",
7
+ clean_view: "clean_view.xsl",
8
+ plain_text: "plaintext.xsl",
9
+ toc: "lectio_outline.xsl"
10
+ }
11
+ },
12
+ git_repo: "bitbucket.org/jeffreycwitt/",
13
+ git_username: ENV["GUN"],
14
+ git_password: ENV["GPW"]
15
+ }
11
16
 
17
+
12
18
 
13
- #filehash = {path: "https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "origin"}
14
- $filehash = {path: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/lectio1/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "local"}
19
+
20
+ #$filehash = {path: "https://bitbucket.org/jeffreycwitt/lectio1/raw/master/reims_lectio1.xml", fs: "lectio1", ed: "master", type: "documentary", source: "origin"}
21
+ $filehash = {path: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/lectio1/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "local", commentary_id: "plaoulcommentary"}
15
22
 
16
23
  $projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/Conf/projectdata.xml"
17
24
 
18
- $pg_projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pg-projectfiles/Conf/projectdata.xml"
25
+ $pg_projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pg-projectfiles/Conf/projectdata.xml"
26
+
27
+ $auto_pp_projectfile = "/Users/JCWitt/WebPages/lbp.rb/pp-projectfiles/Conf/projectfile.xml"
28
+
29
+ $scta_url = "http://scta.info/text/plaoulcommentary/item/lectio1"
30
+ #$scta_url = "http://localhost:4567/text/plaoulcommentary/item/lectio1"
31
+
32
+ $commentary_url = "http://scta.info/text/plaoulcommentary/commentary"
33
+ #$commentary_url = "http://scta.info/text/wodehamordinatio/commentary"
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'lbp'
3
+ require 'pry'
4
+
5
+
6
+ describe 'expression object' do
7
+ #TODO: database needs be changed so that shortID is "sententia"
8
+ $resource_obj1 = Lbp::Expression.new("sentences")
9
+ $resource_obj2 = Lbp::Expression.new("http://scta.info/resource/sententia")
10
+ $resource_item = Lbp::Expression.new("lectio1")
11
+ $resource_toplevelexpression = Lbp::Expression.new("plaoulcommentary")
12
+ $resource_itemFirstInSequence = Lbp::Expression.new("principiumI")
13
+ $resource_itemLastInSequence = Lbp::Expression.new("lectio134")
14
+ $resource_item2 = Lbp::Expression.new("pl-l1d1c1") #structureItem id
15
+ $resource_item3 = Lbp::Expression.new("http://scta.info/resource/l1-acfefv") #paragraph url
16
+ $resource_para = Lbp::Expression.new("l1-acfefv") #paragraph id
17
+ $resource_div1 = Lbp::Expression.new("wdr-l1d1q1") #div short id
18
+ $resource_div2 = Lbp::Expression.new("http://scta.info/resource/wdr-l1d1q1") #div url
19
+
20
+ it 'returns array of manifestations for given expression at the structureItem level' do
21
+ result = $resource_item.manifestationUrls
22
+ expect(result).to be_kind_of(Array)
23
+ end
24
+ it 'returns array of manifestations for given expression structureBlock level' do
25
+ result = $resource_para.manifestationUrls
26
+ expect(result).to be_kind_of(Array)
27
+ end
28
+ it 'returns type of resource id from url to check inheritance from Resource Class' do
29
+ result = $resource_item.type_shortId
30
+ expect(result).to be == "expression"
31
+ end
32
+ it 'returns canonical manifestation' do
33
+ result = $resource_item.canonicalManifestationUrl
34
+ expect(result).to be == "http://scta.info/resource/lectio1/critical"
35
+ end
36
+ it 'returns canonical transcription' do
37
+ result = $resource_item.canonicalTranscriptionUrl
38
+ expect(result).to be == "http://scta.info/resource/lectio1/critical/transcription"
39
+ end
40
+ it 'returns true or false for presence of canonical Transcription' do
41
+ result = $resource_item.canonicalTranscription?
42
+ expect(result).to be == true
43
+ end
44
+ it 'returns false for presence of canonical Transcription' do
45
+ $resource_without_transcript_started = Lbp::Expression.new("b3-q2")
46
+ result = $resource_without_transcript_started.canonicalTranscription?
47
+ expect(result).to be == false
48
+ end
49
+
50
+ it 'returns status of expression' do
51
+ result = $resource_item.status
52
+
53
+ expect(result).to be_kind_of(String)
54
+ end
55
+ it 'returns next expression at the same (structureItem) level' do
56
+ result = $resource_item.next
57
+ expect(result).to be_kind_of(String)
58
+ end
59
+ it 'returns null for expression next request when expression is last in the series' do
60
+ result = $resource_itemLastInSequence.next
61
+ expect(result).to be == nil
62
+ end
63
+ it 'returns previous expression at the same (structureItem) level' do
64
+ result = $resource_item.previous
65
+ expect(result).to be_kind_of(String)
66
+ end
67
+ it 'returns null for expression previous request when expression is first in the series' do
68
+ result = $resource_itemLastInSequence.next
69
+ expect(result).to be == nil
70
+ end
71
+ it 'returns next expression at the same (structureBlock) level' do
72
+ result = $resource_para.next
73
+ expect(result).to be_kind_of(String)
74
+ end
75
+ it 'returns previous expression at the same (structureBlock) level' do
76
+ result = $resource_para.previous
77
+ expect(result).to be_kind_of(String)
78
+ end
79
+ it 'returns top level expression for expression resource' do
80
+ result = $resource_para.top_level_expression_url
81
+ expect(result).to be_kind_of(String)
82
+ end
83
+ it 'returns top level expression for expression resource' do
84
+ result = $resource_item.top_level_expression_url
85
+ expect(result).to be_kind_of(String)
86
+ end
87
+ it 'returns top level expression for expression resource' do
88
+ result = $resource_item.top_level_expression_shortId
89
+ expect(result).to be_kind_of(String)
90
+ end
91
+ it 'returns the level integer from the expression' do
92
+ result = $resource_toplevelexpression.level
93
+ expect(result).to be_kind_of(Integer)
94
+ end
95
+
96
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+ require 'lbp'
3
+ require 'pry'
4
+ require 'nokogiri'
5
+
6
+ describe 'file_part object' do
7
+
8
+ require_relative "config_globals"
9
+ paragraph1 = "l1-cpspfs"
10
+ paragraph3 = "l1-shoatd"
11
+
12
+ $paragraph = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, paragraph1)
13
+ $div = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, paragraph1)
14
+ $topdiv = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, "lectio1")
15
+
16
+ it 'should return the pid for the Paragraph object' do
17
+ result = $paragraph.partid
18
+
19
+ expect(result).to be_kind_of(String)
20
+ end
21
+ # it 'should return the number of the paragraph number' do
22
+ # result = $paragraph.number
23
+ # expect(result).to be_kind_of(Integer)
24
+ # end
25
+ it 'should return the next paragraph object or nil if there are no more paragraphs' do
26
+ result = $paragraph.next
27
+ expect(result).to be_kind_of(Lbp::FilePart)
28
+ end
29
+ it 'should return the previous paragraph object or nil if there are no more paragraphs' do
30
+ result = $paragraph.previous
31
+ #this test works but I don't know how to write a test matching object or nil
32
+ #expect(result).to be(Lbp::Paragraph || nil)
33
+ end
34
+ it 'should return the plain text of the paragraph as a nokogiri object' do
35
+ result = $paragraph.transform_plain_text
36
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
37
+ end
38
+ #it 'should return the plain text of the paragraph as a nokogiri object' do
39
+ # result = $paragraph.transform_main_view
40
+ # expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
41
+ #end
42
+ it 'should return the plain text of the topdiv as a nokogiri object' do
43
+ result = $topdiv.transform_plain_text
44
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
45
+ end
46
+ it 'it should return the paragragraph as TEI XML an nokogiri node set' do
47
+ result = $paragraph.xml
48
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
49
+ end
50
+ it 'it should return element name of element with xmlid' do
51
+ result = $paragraph.element_name
52
+ expect(result).to be_kind_of(String)
53
+ end
54
+
55
+ end