lbp 0.0.2 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.ruby-gemset +1 -1
- data/.ruby-version +1 -1
- data/bin/lbp +33 -2
- data/lbp.gemspec +3 -4
- data/lib/lbp.rb +15 -5
- data/lib/lbp/expression.rb +163 -0
- data/lib/lbp/file.rb +173 -0
- data/lib/lbp/file_part.rb +120 -0
- data/lib/lbp/functions.rb +11 -2
- data/lib/lbp/manifestation.rb +36 -0
- data/lib/lbp/paragraph_image.rb +39 -0
- data/lib/lbp/query.rb +181 -0
- data/lib/lbp/resource.rb +72 -0
- data/lib/lbp/transcription.rb +27 -243
- data/lib/lbp/version.rb +1 -1
- data/spec/config_globals.rb +28 -13
- data/spec/expression_spec.rb +96 -0
- data/spec/file_part_spec.rb +55 -0
- data/spec/file_spec.rb +130 -0
- data/spec/paragraph_image_spec.rb +46 -0
- data/spec/query_spec.rb +27 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/transcription_spec.rb +21 -111
- metadata +40 -19
- data/lib/lbp/collection.rb +0 -131
- data/lib/lbp/item.rb +0 -153
- data/lib/lbp/item_group.rb +0 -52
- data/lib/lbp/paragraph.rb +0 -87
- data/spec/collection_spec.rb +0 -60
- data/spec/item_group_spec.rb +0 -39
- data/spec/item_spec.rb +0 -74
- data/spec/paragraph_spec.rb +0 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be83290049ce0d3c203adf4e3ff2139a709ab72c
|
4
|
+
data.tar.gz: cfdfe9478a8f86ddd3e381c391cfc97142241f45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc21ee397365f89a2194178c533d36fe42d91bebf472561234002a8a9e9e84ca3b61553daa12baf4a5aad9f55f78edbaadf5950e1ffd58cae52e8b1a253d6fe9
|
7
|
+
data.tar.gz: 0eb62da1320ecb9377ff83a5ce4fa3369117fa510af19c5160580823dcf0e00940b8f39750c5d2eddbddf28c9b9dd377143fad7e6ed2b31a9946f9b507b3ad31
|
data/.gitignore
CHANGED
data/.ruby-gemset
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
default
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.2.
|
1
|
+
ruby-2.2.1
|
data/bin/lbp
CHANGED
@@ -65,7 +65,7 @@ class LbpCli < Thor
|
|
65
65
|
|
66
66
|
itemarray << headerdata
|
67
67
|
|
68
|
-
data.query(:predicate => RDF::
|
68
|
+
data.query(:predicate => RDF::URI.new("http://scta.info/property/hasItem")).each do |part|
|
69
69
|
|
70
70
|
newresource = RDF::Resource.new(part.object)
|
71
71
|
newgraph = RDF::Graph.load(newresource)
|
@@ -78,13 +78,44 @@ class LbpCli < Thor
|
|
78
78
|
|
79
79
|
unless status == 'Not Started'
|
80
80
|
title = newdata.query(:predicate => RDF::DC11.title).first.object
|
81
|
+
if newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).count == 0
|
82
|
+
question_title = "unknown"
|
83
|
+
else
|
84
|
+
question_title = newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).first.object
|
85
|
+
end
|
81
86
|
id = URI(part.object.to_s).path.split('/').last
|
82
87
|
itemunit = "
|
83
88
|
<item live='#{status}'>
|
84
89
|
<fileName filestem='#{id}'>#{id}.xml</fileName>
|
85
90
|
<title>#{title}</title>
|
86
|
-
|
91
|
+
<questionTitle>#{question_title}</questionTitle>"
|
87
92
|
itemarray << itemunit
|
93
|
+
#eventually hasPart property should be sctap:hasTranscription
|
94
|
+
hastranscriptions = newdata.query(:predicate => RDF::DC.hasPart)
|
95
|
+
if hastranscriptions.count > 0
|
96
|
+
partunitopen = "\n<hasParts>"
|
97
|
+
itemarray << partunitopen
|
98
|
+
hastranscriptions.each do |transcription|
|
99
|
+
transcription_resource = RDF::Resource.new(transcription.object)
|
100
|
+
transcription_graph = RDF::Graph.load(transcription_resource)
|
101
|
+
transcription_data = transcription_graph.data
|
102
|
+
transcription_title = transcription_data.query(:predicate => RDF::DC11.title).first.object
|
103
|
+
transcription_id = URI(transcription.object.to_s).path.split('/').last
|
104
|
+
transcription_slug = transcription_id.split("_").first
|
105
|
+
transcription_initial = transcription_slug.each_char.first.upcase # not ideal, some initials will be two letters
|
106
|
+
partunit = "\n<part>
|
107
|
+
<slug>#{transcription_slug}</slug>
|
108
|
+
<title>#{transcription_title}</title>
|
109
|
+
<initial>#{transcription_initial}</initial>
|
110
|
+
</part>"
|
111
|
+
itemarray << partunit
|
112
|
+
end
|
113
|
+
partunitclose = "\n</hasParts>"
|
114
|
+
itemarray << partunitclose
|
115
|
+
end
|
116
|
+
#end
|
117
|
+
close_item_unit = "\n</item>\n"
|
118
|
+
itemarray << close_item_unit
|
88
119
|
end
|
89
120
|
|
90
121
|
end
|
data/lbp.gemspec
CHANGED
@@ -23,14 +23,13 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "rspec"
|
24
24
|
spec.add_development_dependency "pry"
|
25
25
|
spec.add_runtime_dependency "nokogiri"
|
26
|
-
spec.add_runtime_dependency "rugged"
|
26
|
+
#spec.add_runtime_dependency "rugged"
|
27
27
|
spec.add_runtime_dependency "thor"
|
28
28
|
spec.add_runtime_dependency "rdf"
|
29
29
|
spec.add_runtime_dependency "rdf-rdfxml"
|
30
|
+
spec.add_runtime_dependency "rdf-vocab"
|
30
31
|
spec.add_runtime_dependency "rest-client"
|
32
|
+
spec.add_runtime_dependency "sparql"
|
31
33
|
|
32
34
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
35
|
end
|
data/lib/lbp.rb
CHANGED
@@ -1,11 +1,21 @@
|
|
1
1
|
require "lbp/version"
|
2
2
|
|
3
|
+
require 'lbp/functions'
|
4
|
+
|
5
|
+
#still need review
|
6
|
+
require 'lbp/query'
|
7
|
+
require 'lbp/paragraph_image'
|
3
8
|
|
4
|
-
|
5
|
-
|
6
|
-
require 'lbp/
|
9
|
+
|
10
|
+
#new files
|
11
|
+
require 'lbp/resource'
|
12
|
+
require 'lbp/expression'
|
13
|
+
require 'lbp/manifestation'
|
7
14
|
require 'lbp/transcription'
|
8
|
-
|
9
|
-
require 'lbp/
|
15
|
+
require 'lbp/file'
|
16
|
+
require 'lbp/file_part'
|
17
|
+
|
18
|
+
|
19
|
+
|
10
20
|
|
11
21
|
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'openssl'
|
2
|
+
require 'rdf'
|
3
|
+
require 'rdf/rdfxml'
|
4
|
+
require 'rdf/ntriples'
|
5
|
+
require 'rdf/vocab'
|
6
|
+
require 'lbp'
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
module Lbp
|
12
|
+
class Expression < Resource
|
13
|
+
|
14
|
+
#inherits initialization from Resource
|
15
|
+
|
16
|
+
def manifestationUrls
|
17
|
+
results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasManifestation"))
|
18
|
+
manifestations = results.map {|m| m[:o].to_s}
|
19
|
+
return manifestations
|
20
|
+
end
|
21
|
+
def canonicalManifestationUrl
|
22
|
+
manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
|
23
|
+
return manifestation
|
24
|
+
end
|
25
|
+
def canonicalManifestation
|
26
|
+
url = self.canonicalManifestationUrl
|
27
|
+
manifestationObj = Manifestation.new(url)
|
28
|
+
return manifestationObj
|
29
|
+
end
|
30
|
+
def canonicalManifestation?
|
31
|
+
if self.canonicalManifestationUrl == nil
|
32
|
+
return false
|
33
|
+
else
|
34
|
+
return true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
# cannonical transcriptions refers to the canonical trancription
|
38
|
+
# of the canonical manifestation
|
39
|
+
def canonicalTranscriptionUrl
|
40
|
+
manifestationObj = self.canonicalManifestation
|
41
|
+
url = manifestationObj.canonicalTranscriptionUrl
|
42
|
+
return url
|
43
|
+
end
|
44
|
+
def canonicalTranscription
|
45
|
+
url = self.canonicalTranscriptionUrl
|
46
|
+
transcriptionObj = Transcription.new(url)
|
47
|
+
return transcriptionObj
|
48
|
+
end
|
49
|
+
def canonicalTranscription?
|
50
|
+
if self.canonicalManifestation? == false
|
51
|
+
return false
|
52
|
+
else
|
53
|
+
if self.canonicalTranscriptionUrl == nil
|
54
|
+
return false
|
55
|
+
else
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
def transcriptionUrl(manifestationUrl)
|
61
|
+
manifestationObj = Manifestation.new(manifestationUrl)
|
62
|
+
transcriptionObj = manifestationObj.canonicalTranscriptionUrl
|
63
|
+
return transcriptionObj
|
64
|
+
end
|
65
|
+
def transcription(manifestationUrl)
|
66
|
+
manifestationObj = Manifestation.new(manifestationUrl)
|
67
|
+
transcriptionObj = manifestationObj.canonicalTranscription
|
68
|
+
return transcriptionObj
|
69
|
+
end
|
70
|
+
def next
|
71
|
+
unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).count == 0
|
72
|
+
next_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).first[:o].to_s
|
73
|
+
else
|
74
|
+
next_expression = nil
|
75
|
+
end
|
76
|
+
return next_expression
|
77
|
+
end
|
78
|
+
def previous
|
79
|
+
unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).count == 0
|
80
|
+
previous_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).first[:o].to_s
|
81
|
+
else
|
82
|
+
previous_expression = nil
|
83
|
+
end
|
84
|
+
return previous_expression
|
85
|
+
end
|
86
|
+
def order_number
|
87
|
+
## TODO: consider changing property so that there is more symmetry here
|
88
|
+
if self.structureType_shortId == "structureBlock"
|
89
|
+
ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/paragraphNumber")).first[:o].to_s.to_i
|
90
|
+
else
|
91
|
+
ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/totalOrderNumber")).first[:o].to_s.to_i
|
92
|
+
end
|
93
|
+
return ordernumber
|
94
|
+
end
|
95
|
+
def status
|
96
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/status")).first[:o].to_s
|
97
|
+
end
|
98
|
+
|
99
|
+
def top_level_expression_url
|
100
|
+
#TODO make sure this can handle different structure types
|
101
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfTopLevelExpression")).first[:o].to_s
|
102
|
+
end
|
103
|
+
def top_level_expression_shortId
|
104
|
+
self.top_level_expression_url.split("/").last
|
105
|
+
end
|
106
|
+
def top_level_expression
|
107
|
+
expression = Expression.new(self.top_level_expression_url)
|
108
|
+
end
|
109
|
+
|
110
|
+
def item_level_expression_url
|
111
|
+
#TODO make sure this can handle different structure types
|
112
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfStructureItem")).first[:o].to_s
|
113
|
+
end
|
114
|
+
def item_level_expression_shortId
|
115
|
+
self.item_level_expression_url.split("/").last
|
116
|
+
end
|
117
|
+
def item_level_expression
|
118
|
+
expression = Expression.new(self.item_level_expression_url)
|
119
|
+
end
|
120
|
+
def level
|
121
|
+
result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
|
122
|
+
unless self.results.count == 0
|
123
|
+
level = result.to_s.to_i
|
124
|
+
else
|
125
|
+
level = nil
|
126
|
+
end
|
127
|
+
return level
|
128
|
+
end
|
129
|
+
|
130
|
+
# connection properties
|
131
|
+
#TODO: notice how all these return RDF::Solutions (or some RDF:: object)
|
132
|
+
# rather already performing the conversion to strings as is done in all the above methods
|
133
|
+
# this should be standardized
|
134
|
+
def abbreviates
|
135
|
+
abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
|
136
|
+
end
|
137
|
+
def abbreviatedBy
|
138
|
+
abbreviatedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviatedBy"))
|
139
|
+
end
|
140
|
+
def references
|
141
|
+
references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/references"))
|
142
|
+
end
|
143
|
+
def referencedBy
|
144
|
+
references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/referencedBy"))
|
145
|
+
end
|
146
|
+
def copies
|
147
|
+
copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copies"))
|
148
|
+
end
|
149
|
+
def copiedBy
|
150
|
+
copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copiedBy"))
|
151
|
+
end
|
152
|
+
def mentions
|
153
|
+
mentions = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/mentions"))
|
154
|
+
end
|
155
|
+
def quotes
|
156
|
+
quotes = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotes"))
|
157
|
+
end
|
158
|
+
def quotedBy
|
159
|
+
quotedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotedBy"))
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
data/lib/lbp/file.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
#require 'lbp/functions'
|
3
|
+
#require 'lbp/item'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'lbp'
|
6
|
+
|
7
|
+
module Lbp
|
8
|
+
# class should be renamed to Transcription
|
9
|
+
class File
|
10
|
+
attr_reader :xslt_dir, :file_path
|
11
|
+
|
12
|
+
def initialize(filepath, transcription_type, confighash)
|
13
|
+
@file_path = filepath
|
14
|
+
@confighash = confighash
|
15
|
+
@xslthash = @confighash[:xslt_dirs]
|
16
|
+
|
17
|
+
@type = transcription_type # critical or documentary
|
18
|
+
|
19
|
+
#xslt version needs to gathered from a method
|
20
|
+
xslt_version = nil
|
21
|
+
#for now its being set to nil because no documents currently declare it
|
22
|
+
|
23
|
+
if xslt_version == nil
|
24
|
+
@schema = @xslthash["default"]
|
25
|
+
else
|
26
|
+
@schema = @xslthash[xslt_version]
|
27
|
+
end
|
28
|
+
|
29
|
+
if @type == 'critical' || @type == 'Critical'
|
30
|
+
@xslt_dir = @schema[:critical]
|
31
|
+
elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
|
32
|
+
@xslt_dir = @schema[:documentary]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def file
|
37
|
+
#TODO: needs to be written so auth is only need after request without
|
38
|
+
#auth is rejected
|
39
|
+
|
40
|
+
#file = open(self.file_path)
|
41
|
+
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
42
|
+
return file
|
43
|
+
end
|
44
|
+
def nokogiri
|
45
|
+
xmldoc = Nokogiri::XML(self.file)
|
46
|
+
end
|
47
|
+
## End File Path Methods
|
48
|
+
### Item Header Extraction and Metadata Methods
|
49
|
+
def title
|
50
|
+
xmldoc = self.nokogiri
|
51
|
+
title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
52
|
+
return title.text
|
53
|
+
end
|
54
|
+
def author
|
55
|
+
xmldoc = self.nokogiri
|
56
|
+
author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
57
|
+
return author.text
|
58
|
+
end
|
59
|
+
def editor
|
60
|
+
xmldoc = self.nokogiri
|
61
|
+
editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
62
|
+
return editor.text
|
63
|
+
end
|
64
|
+
def ed_no
|
65
|
+
xmldoc = self.nokogiri
|
66
|
+
ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
67
|
+
return ed_no.value
|
68
|
+
end
|
69
|
+
def ed_date
|
70
|
+
xmldoc = self.nokogiri
|
71
|
+
ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
72
|
+
return ed_date.value
|
73
|
+
end
|
74
|
+
def pub_date
|
75
|
+
xmldoc = self.nokogiri
|
76
|
+
pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
77
|
+
return pub_date.value
|
78
|
+
end
|
79
|
+
def encoding_method
|
80
|
+
xmldoc = self.nokogiri
|
81
|
+
encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
82
|
+
return encoding_method.value
|
83
|
+
end
|
84
|
+
def encoding_location
|
85
|
+
xmldoc = self.nokogiri
|
86
|
+
encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
87
|
+
return encoding_location.value
|
88
|
+
end
|
89
|
+
def number_of_columns
|
90
|
+
xmldoc = self.nokogiri
|
91
|
+
test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
92
|
+
if @type == "critical"
|
93
|
+
number_of_columns = nil
|
94
|
+
elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
95
|
+
number_of_columns = 1
|
96
|
+
elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
97
|
+
number_of_columns = 2
|
98
|
+
end
|
99
|
+
return number_of_columns
|
100
|
+
end
|
101
|
+
|
102
|
+
### Begin transform (XSLT) methocs ###
|
103
|
+
def transform(xsltfile, xslt_param_array=[])
|
104
|
+
doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array)
|
105
|
+
end
|
106
|
+
def transform_apply(xsltfile, xslt_param_array=[])
|
107
|
+
doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
|
108
|
+
end
|
109
|
+
def transform_main_view(xslt_param_array=[])
|
110
|
+
xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
|
111
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
112
|
+
end
|
113
|
+
def transform_index_view(xslt_param_array=[])
|
114
|
+
xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
|
115
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
116
|
+
end
|
117
|
+
def transform_clean(xslt_param_array=[])
|
118
|
+
xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
|
119
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
120
|
+
end
|
121
|
+
def transform_clean_nokogiri(xslt_param_array=[])
|
122
|
+
xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
|
123
|
+
doc = self.transform(xsltfile, xslt_param_array)
|
124
|
+
end
|
125
|
+
def transform_plain_text(xslt_param_array=[])
|
126
|
+
xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
|
127
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
128
|
+
end
|
129
|
+
def transform_plain_text_nokogiri(xslt_param_array=[])
|
130
|
+
xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
|
131
|
+
doc = self.transform(xsltfile, xslt_param_array)
|
132
|
+
end
|
133
|
+
def transform_json(xslt_param_array=[])
|
134
|
+
xsltfile=@xslt_dir + @schema[:json] # "plaintext.xsl"
|
135
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
136
|
+
end
|
137
|
+
def transform_toc(xslt_param_array=[])
|
138
|
+
xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
|
139
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
140
|
+
end
|
141
|
+
### End of Transformation Methods ###
|
142
|
+
### Begin Statistics Methods ###
|
143
|
+
def word_count
|
144
|
+
plaintext = self.transform_plain_text
|
145
|
+
size = plaintext.split.size
|
146
|
+
end
|
147
|
+
def word_array
|
148
|
+
plaintext = self.transform_plain_text
|
149
|
+
word_array = plaintext.split
|
150
|
+
word_array.map!{ |word| word.downcase}
|
151
|
+
end
|
152
|
+
def word_frequency(sort, order)
|
153
|
+
word_array = self.word_array
|
154
|
+
wf = Hash.new(0)
|
155
|
+
word_array.each { |word| wf[word] += 1 }
|
156
|
+
|
157
|
+
if sort == "frequency"
|
158
|
+
if order == "descending" # high to low
|
159
|
+
wf = wf.sort_by{|k,v| v}.reverse
|
160
|
+
elsif order == "ascending" # low to high
|
161
|
+
wf = wf.sort_by{|k,v| v}
|
162
|
+
end
|
163
|
+
elsif sort == "word"
|
164
|
+
if order == "descending" # z - a
|
165
|
+
wf = wf.sort_by{|k,v| k}.reverse
|
166
|
+
elsif order == "ascending" #a - z
|
167
|
+
wf = wf.sort_by{|k,v| k}
|
168
|
+
end
|
169
|
+
end
|
170
|
+
return wf.to_h
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|