lbp 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,249 +1,33 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
- require 'lbp/item'
5
- require 'open-uri'
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
6
7
 
7
8
  module Lbp
8
- class Transcription
9
- attr_reader :fs, :type, :ed, :xslt_dir
10
-
11
- def initialize(projectfile, filehash)
12
-
13
- @filehash = filehash
14
- @projectfile = projectfile
15
-
16
- @fs = filehash[:fs]
17
- @type = filehash[:type] # critical or documentary
18
- @ed = filehash[:ed]
19
-
20
- @confighash = Collection.new(@projectfile).confighash
21
- @xslthash = @confighash[:xslt_dirs]
22
-
23
- #xslt version needs to gathered from a method
24
- xslt_version = nil
25
- #for now its being set to nil because no documents currently declare it
26
-
27
- if xslt_version == nil
28
- @schema = @xslthash["default"]
29
- else
30
- @schema = @xslthash[xslt_version]
31
- end
32
-
33
- if @type == 'critical'
34
- @xslt_dir = @schema[:critical]
35
- elsif @type == 'documentary'
36
- @xslt_dir = @schema[:documentary]
37
- end
38
-
39
-
40
- if @filehash[:source] == 'local'
41
- item = Item.new(@projectfile, @fs)
42
- @current_branch = item.git_current_branch
43
- # the effort here is to only set instance variable when absolutely necessary
44
- if @current_branch != @ed
45
- @item = item
46
- end
47
- end
9
+ class Transcription < Resource
10
+ #initionalization handled by Resource Class
11
+ def file_path
12
+ file_path = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasXML")).first[:o].to_s
13
+ end
14
+ def transcription_type
15
+ type = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/transcriptionType")).first[:o].to_s
16
+ type.downcase
48
17
  end
49
- ## Begin file path methods
50
- # Returns the absolute path of the file requested
51
- def file_path
52
- @filehash[:path]
53
- end
54
- def file
55
-
56
- file = open(self.file_path)
57
- end
58
- def nokogiri
59
- xmldoc = Nokogiri::XML(self.file)
60
18
 
61
- end
62
- ## End File Path Methods
63
- ### Item Header Extraction and Metadata Methods
64
- def title
65
- xmldoc = self.nokogiri
66
-
67
- title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
68
- return title.text
69
- end
70
- def author
71
- xmldoc = self.nokogiri
72
- author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
73
- return author.text
74
- end
75
- def editor
76
- xmldoc = self.nokogiri
77
- editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
78
- return editor.text
79
- end
80
- def ed_no
81
- xmldoc = self.nokogiri
82
- ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
83
- return ed_no.value
84
- end
85
- def ed_date
86
- xmldoc = self.nokogiri
87
- ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
88
- return ed_date.value
89
- end
90
- def pub_date
91
- xmldoc = self.nokogiri
92
- pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
93
- return pub_date.value
94
- end
95
- def encoding_method
96
- xmldoc = self.nokogiri
97
- encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
98
- return encoding_method.value
99
- end
100
- def encoding_location
101
- xmldoc = self.nokogiri
102
- encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
103
- return encoding_location.value
104
- end
105
- def number_of_columns
106
- xmldoc = self.nokogiri
107
- test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
108
- if @type == "critical"
109
- number_of_columns = nil
110
- elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
111
- number_of_columns = 1
112
- elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
113
- number_of_columns = 2
114
- end
115
- return number_of_columns
116
- end
117
-
118
- =begin - I think these methods belong with the Item or ItemRepo Object
119
-
120
- ### End Header and Metadata Information Extraction Methods ###
121
- ### Begin GIT functions ###
122
- def is_git_dir
123
- gitpath = @file_dir + ".git"
124
-
125
- if File.directory?(gitpath)
126
- true
127
- else
128
- false
129
- end
130
- end
131
- def git_branches
132
- repo = Rugged::Repository.new(@file_dir)
133
- branches = repo.branches.map { |branch| branch.name }
134
- return branches
135
- end
136
- def git_current_branch
137
- repo = Rugged::Repository.new(@file_dir)
138
- current_branch = repo.head.name.gsub(%r!\Arefs/heads/(.*)\z!) { $1 }
139
- return current_branch
140
- end
141
- def git_tags
142
- repo = Rugged::Repository.new(@file_dir)
143
- tags = repo.tags.map { |tag| tag.name }
144
- return tags
145
- end
146
- #need test for this
147
- def git_checkout(branch)
148
- repo = Rugged::Repository.new(@file_dir)
149
- repo.checkout(branch)
150
- end
151
- ### End Git Methods ###
152
- =end
153
- ### Begin transform (XSLT) methocs ###
154
- def transform(xsltfile, xslt_param_array=[])
155
-
156
- xmlfile = self.file_path
157
- if @current_branch != @ed && @filehash[:source] == 'local'
158
- @item.git_checkout(@ed)
159
- doc = xslt_transform(xmlfile, xsltfile, xslt_param_array)
160
- @item.git_checkout(@current_branch);
161
- else
162
- doc = xslt_transform(xmlfile, xsltfile, xslt_param_array)
163
- end
164
- end
165
-
166
- def transform_main_view(xslt_param_array=[])
167
- xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
168
- doc = self.transform(xsltfile, xslt_param_array=[])
169
- end
170
- def transform_index_view(xslt_param_array=[])
171
- xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
172
- doc = self.transform( xsltfile, xslt_param_array=[])
173
- end
174
- def transform_clean(xslt_param_array=[])
175
- xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
176
- doc = self.transform(xsltfile, xslt_param_array=[])
177
- end
178
- def transform_plain_text(xslt_param_array=[])
179
- xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
180
- doc = self.transform(xsltfile, xslt_param_array=[])
181
- end
182
- def transform_toc(xslt_param_array=[])
183
- xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
184
- doc = self.transform(xsltfile, xslt_param_array=[])
185
- end
186
- ### End of Transformation Methods ###
187
- ### Begin Statistics Methods ###
188
- def word_count
189
- plaintext = self.transform_plain_text
190
- size = plaintext.text.split.size
191
- end
192
- def word_array
193
- plaintext = self.transform_plain_text
194
- word_array = plaintext.text.split
195
- word_array.map!{ |word| word.downcase}
196
- end
197
- def word_frequency(sort, order)
198
- word_array = self.word_array
199
- wf = Hash.new(0)
200
- word_array.each { |word| wf[word] += 1 }
201
-
202
- if sort == "frequency"
203
- if order == "descending" # high to low
204
- wf = wf.sort_by{|k,v| v}.reverse
205
- elsif order == "ascending" # low to high
206
- wf = wf.sort_by{|k,v| v}
207
- end
208
- elsif sort == "word"
209
- if order == "descending" # z - a
210
- wf = wf.sort_by{|k,v| k}.reverse
211
- elsif order == "ascending" #a - z
212
- wf = wf.sort_by{|k,v| k}
213
- end
214
- end
215
- return wf.to_h
216
- end
217
- def number_of_body_paragraphs
218
- if @current_branch != @ed && @filehash[:source] == 'local'
219
- @item.git_checkout(@ed)
220
- xmldoc = self.nokogiri
221
- p = xmldoc.xpath("//tei:body//tei:p", 'tei' => 'http://www.tei-c.org/ns/1.0')
222
- @item.git_checkout(@current_branch);
223
- else
224
- xmldoc = self.nokogiri
225
- p = xmldoc.xpath("//tei:body//tei:p", 'tei' => 'http://www.tei-c.org/ns/1.0')
226
- end
227
- return p.count
228
- end
229
- def paragraphs
230
- ## it's not good to keep reusing this, git check out condition. Need a better solution
231
- if @current_branch != @ed && @filehash[:source] == 'local'
232
- @item.git_checkout(@ed)
233
- xmldoc = self.nokogiri
234
- paragraphs = xmldoc.xpath("//tei:body//tei:p/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
235
- @item.git_checkout(@current_branch);
236
- else
237
- xmldoc = self.nokogiri
238
- paragraphs = xmldoc.xpath("//tei:body//tei:p/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
239
- end
240
-
241
- paragraph_objects = paragraphs.map do |p| Paragraph.new(@projectfile, @filehash, p.value) end
242
-
243
- return paragraph_objects
244
- end
245
- def paragraph(pid)
246
- Paragraph.new(@projectfile, @filehash, pid)
247
- end
19
+ def file(confighash)
20
+ file = File.new(self.file_path, self.transcription_type, confighash)
21
+ return file
22
+ end
23
+ #NOTE: this really is a temporary method, since the database
24
+ #should point to file corresponding to each transcription
25
+ #dynamically generated by the exist-db database.
26
+ # but this could remain in case it was useful to grab the part
27
+ # from a file that would include a tei header etc.
28
+ def file_part(confighash, partid)
29
+ file = FilePart.new(self.file_path, self.transcription_type, confighash, partid)
30
+ return file
31
+ end
248
32
  end
249
33
  end
@@ -1,3 +1,3 @@
1
1
  module Lbp
2
- VERSION = "0.0.2"
2
+ VERSION = "0.1.0"
3
3
  end
@@ -1,18 +1,33 @@
1
- $confighash = { texts_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projecfiles/GitTextfiles/",
2
- projectdatafile_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/Conf/",
3
- xslt_critical_dir: "/Users/JCWitt/WebPages/lbpwrapper/lombardpress/public/pl_xslt_stylesheets/",
4
- xslt_documentary_dir: "/Users/JCWitt/WebPages/lbpwrapper/lombardpress/public/pl_xslt_stylesheets/",
5
- xslt_main_view: "text_display.xsl",
6
- xslt_index_view: "text_display_index.xsl",
7
- xslt_clean: "clean_forStatistics.xsl",
8
- xslt_plain_text: "plaintext.xsl",
9
- xslt_toc: "lectio_outline.xsl",
10
- git_repo: "bitbucket.org/jeffreycwitt/"}
1
+ $confighash = {local_texts_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/",
2
+ citation_lists_dir: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/citationlists/",
3
+ xslt_dirs: { "default" => {
4
+ critical: "/Users/jcwitt/Projects/lombardpress/lombardpress2/xslt/default/critical/",
5
+ documentary: "/Users/jcwitt/Projects/lombardpress/lombardpress2/xslt/default/documentary/",
6
+ main_view: "main_view.xsl",
7
+ clean_view: "clean_view.xsl",
8
+ plain_text: "plaintext.xsl",
9
+ toc: "lectio_outline.xsl"
10
+ }
11
+ },
12
+ git_repo: "bitbucket.org/jeffreycwitt/",
13
+ git_username: ENV["GUN"],
14
+ git_password: ENV["GPW"]
15
+ }
11
16
 
17
+
12
18
 
13
- #filehash = {path: "https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "origin"}
14
- $filehash = {path: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/lectio1/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "local"}
19
+
20
+ #$filehash = {path: "https://bitbucket.org/jeffreycwitt/lectio1/raw/master/reims_lectio1.xml", fs: "lectio1", ed: "master", type: "documentary", source: "origin"}
21
+ $filehash = {path: "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/GitTextfiles/lectio1/lectio1.xml", fs: "lectio1", ed: "master", type: "critical", source: "local", commentary_id: "plaoulcommentary"}
15
22
 
16
23
  $projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pp-projectfiles/Conf/projectdata.xml"
17
24
 
18
- $pg_projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pg-projectfiles/Conf/projectdata.xml"
25
+ $pg_projectfile = "/Users/JCWitt/WebPages/lbplib-testfiles/pg-projectfiles/Conf/projectdata.xml"
26
+
27
+ $auto_pp_projectfile = "/Users/JCWitt/WebPages/lbp.rb/pp-projectfiles/Conf/projectfile.xml"
28
+
29
+ $scta_url = "http://scta.info/text/plaoulcommentary/item/lectio1"
30
+ #$scta_url = "http://localhost:4567/text/plaoulcommentary/item/lectio1"
31
+
32
+ $commentary_url = "http://scta.info/text/plaoulcommentary/commentary"
33
+ #$commentary_url = "http://scta.info/text/wodehamordinatio/commentary"
@@ -0,0 +1,96 @@
1
+ require 'spec_helper'
2
+ require 'lbp'
3
+ require 'pry'
4
+
5
+
6
+ describe 'expression object' do
7
+ #TODO: database needs be changed so that shortID is "sententia"
8
+ $resource_obj1 = Lbp::Expression.new("sentences")
9
+ $resource_obj2 = Lbp::Expression.new("http://scta.info/resource/sententia")
10
+ $resource_item = Lbp::Expression.new("lectio1")
11
+ $resource_toplevelexpression = Lbp::Expression.new("plaoulcommentary")
12
+ $resource_itemFirstInSequence = Lbp::Expression.new("principiumI")
13
+ $resource_itemLastInSequence = Lbp::Expression.new("lectio134")
14
+ $resource_item2 = Lbp::Expression.new("pl-l1d1c1") #structureItem id
15
+ $resource_item3 = Lbp::Expression.new("http://scta.info/resource/l1-acfefv") #paragraph url
16
+ $resource_para = Lbp::Expression.new("l1-acfefv") #paragraph id
17
+ $resource_div1 = Lbp::Expression.new("wdr-l1d1q1") #div short id
18
+ $resource_div2 = Lbp::Expression.new("http://scta.info/resource/wdr-l1d1q1") #div url
19
+
20
+ it 'returns array of manifestations for given expression at the structureItem level' do
21
+ result = $resource_item.manifestationUrls
22
+ expect(result).to be_kind_of(Array)
23
+ end
24
+ it 'returns array of manifestations for given expression structureBlock level' do
25
+ result = $resource_para.manifestationUrls
26
+ expect(result).to be_kind_of(Array)
27
+ end
28
+ it 'returns type of resource id from url to check inheritance from Resource Class' do
29
+ result = $resource_item.type_shortId
30
+ expect(result).to be == "expression"
31
+ end
32
+ it 'returns canonical manifestation' do
33
+ result = $resource_item.canonicalManifestationUrl
34
+ expect(result).to be == "http://scta.info/resource/lectio1/critical"
35
+ end
36
+ it 'returns canonical transcription' do
37
+ result = $resource_item.canonicalTranscriptionUrl
38
+ expect(result).to be == "http://scta.info/resource/lectio1/critical/transcription"
39
+ end
40
+ it 'returns true or false for presence of canonical Transcription' do
41
+ result = $resource_item.canonicalTranscription?
42
+ expect(result).to be == true
43
+ end
44
+ it 'returns false for presence of canonical Transcription' do
45
+ $resource_without_transcript_started = Lbp::Expression.new("b3-q2")
46
+ result = $resource_without_transcript_started.canonicalTranscription?
47
+ expect(result).to be == false
48
+ end
49
+
50
+ it 'returns status of expression' do
51
+ result = $resource_item.status
52
+
53
+ expect(result).to be_kind_of(String)
54
+ end
55
+ it 'returns next expression at the same (structureItem) level' do
56
+ result = $resource_item.next
57
+ expect(result).to be_kind_of(String)
58
+ end
59
+ it 'returns null for expression next request when expression is last in the series' do
60
+ result = $resource_itemLastInSequence.next
61
+ expect(result).to be == nil
62
+ end
63
+ it 'returns previous expression at the same (structureItem) level' do
64
+ result = $resource_item.previous
65
+ expect(result).to be_kind_of(String)
66
+ end
67
+ it 'returns null for expression previous request when expression is first in the series' do
68
+ result = $resource_itemLastInSequence.next
69
+ expect(result).to be == nil
70
+ end
71
+ it 'returns next expression at the same (structureBlock) level' do
72
+ result = $resource_para.next
73
+ expect(result).to be_kind_of(String)
74
+ end
75
+ it 'returns previous expression at the same (structureBlock) level' do
76
+ result = $resource_para.previous
77
+ expect(result).to be_kind_of(String)
78
+ end
79
+ it 'returns top level expression for expression resource' do
80
+ result = $resource_para.top_level_expression_url
81
+ expect(result).to be_kind_of(String)
82
+ end
83
+ it 'returns top level expression for expression resource' do
84
+ result = $resource_item.top_level_expression_url
85
+ expect(result).to be_kind_of(String)
86
+ end
87
+ it 'returns top level expression for expression resource' do
88
+ result = $resource_item.top_level_expression_shortId
89
+ expect(result).to be_kind_of(String)
90
+ end
91
+ it 'returns the level integer from the expression' do
92
+ result = $resource_toplevelexpression.level
93
+ expect(result).to be_kind_of(Integer)
94
+ end
95
+
96
+ end
@@ -0,0 +1,55 @@
1
+ require 'spec_helper'
2
+ require 'lbp'
3
+ require 'pry'
4
+ require 'nokogiri'
5
+
6
+ describe 'file_part object' do
7
+
8
+ require_relative "config_globals"
9
+ paragraph1 = "l1-cpspfs"
10
+ paragraph3 = "l1-shoatd"
11
+
12
+ $paragraph = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, paragraph1)
13
+ $div = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, paragraph1)
14
+ $topdiv = Lbp::FilePart.new("https://bitbucket.org/jeffreycwitt/lectio1/raw/master/lectio1.xml", "critical", $confighash, "lectio1")
15
+
16
+ it 'should return the pid for the Paragraph object' do
17
+ result = $paragraph.partid
18
+
19
+ expect(result).to be_kind_of(String)
20
+ end
21
+ # it 'should return the number of the paragraph number' do
22
+ # result = $paragraph.number
23
+ # expect(result).to be_kind_of(Integer)
24
+ # end
25
+ it 'should return the next paragraph object or nil if there are no more paragraphs' do
26
+ result = $paragraph.next
27
+ expect(result).to be_kind_of(Lbp::FilePart)
28
+ end
29
+ it 'should return the previous paragraph object or nil if there are no more paragraphs' do
30
+ result = $paragraph.previous
31
+ #this test works but I don't know how to write a test matching object or nil
32
+ #expect(result).to be(Lbp::Paragraph || nil)
33
+ end
34
+ it 'should return the plain text of the paragraph as a nokogiri object' do
35
+ result = $paragraph.transform_plain_text
36
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
37
+ end
38
+ #it 'should return the plain text of the paragraph as a nokogiri object' do
39
+ # result = $paragraph.transform_main_view
40
+ # expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
41
+ #end
42
+ it 'should return the plain text of the topdiv as a nokogiri object' do
43
+ result = $topdiv.transform_plain_text
44
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
45
+ end
46
+ it 'it should return the paragragraph as TEI XML an nokogiri node set' do
47
+ result = $paragraph.xml
48
+ expect(result).to be_instance_of(Nokogiri::XML::NodeSet)
49
+ end
50
+ it 'it should return element name of element with xmlid' do
51
+ result = $paragraph.element_name
52
+ expect(result).to be_kind_of(String)
53
+ end
54
+
55
+ end