lbp 0.0.2 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.ruby-gemset +1 -1
- data/.ruby-version +1 -1
- data/bin/lbp +33 -2
- data/lbp.gemspec +3 -4
- data/lib/lbp.rb +15 -5
- data/lib/lbp/expression.rb +163 -0
- data/lib/lbp/file.rb +173 -0
- data/lib/lbp/file_part.rb +120 -0
- data/lib/lbp/functions.rb +11 -2
- data/lib/lbp/manifestation.rb +36 -0
- data/lib/lbp/paragraph_image.rb +39 -0
- data/lib/lbp/query.rb +181 -0
- data/lib/lbp/resource.rb +72 -0
- data/lib/lbp/transcription.rb +27 -243
- data/lib/lbp/version.rb +1 -1
- data/spec/config_globals.rb +28 -13
- data/spec/expression_spec.rb +96 -0
- data/spec/file_part_spec.rb +55 -0
- data/spec/file_spec.rb +130 -0
- data/spec/paragraph_image_spec.rb +46 -0
- data/spec/query_spec.rb +27 -0
- data/spec/resource_spec.rb +78 -0
- data/spec/transcription_spec.rb +21 -111
- metadata +40 -19
- data/lib/lbp/collection.rb +0 -131
- data/lib/lbp/item.rb +0 -153
- data/lib/lbp/item_group.rb +0 -52
- data/lib/lbp/paragraph.rb +0 -87
- data/spec/collection_spec.rb +0 -60
- data/spec/item_group_spec.rb +0 -39
- data/spec/item_spec.rb +0 -74
- data/spec/paragraph_spec.rb +0 -37
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: be83290049ce0d3c203adf4e3ff2139a709ab72c
|
4
|
+
data.tar.gz: cfdfe9478a8f86ddd3e381c391cfc97142241f45
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: cc21ee397365f89a2194178c533d36fe42d91bebf472561234002a8a9e9e84ca3b61553daa12baf4a5aad9f55f78edbaadf5950e1ffd58cae52e8b1a253d6fe9
|
7
|
+
data.tar.gz: 0eb62da1320ecb9377ff83a5ce4fa3369117fa510af19c5160580823dcf0e00940b8f39750c5d2eddbddf28c9b9dd377143fad7e6ed2b31a9946f9b507b3ad31
|
data/.gitignore
CHANGED
data/.ruby-gemset
CHANGED
@@ -1 +1 @@
|
|
1
|
-
|
1
|
+
default
|
data/.ruby-version
CHANGED
@@ -1 +1 @@
|
|
1
|
-
ruby-2.2.
|
1
|
+
ruby-2.2.1
|
data/bin/lbp
CHANGED
@@ -65,7 +65,7 @@ class LbpCli < Thor
|
|
65
65
|
|
66
66
|
itemarray << headerdata
|
67
67
|
|
68
|
-
data.query(:predicate => RDF::
|
68
|
+
data.query(:predicate => RDF::URI.new("http://scta.info/property/hasItem")).each do |part|
|
69
69
|
|
70
70
|
newresource = RDF::Resource.new(part.object)
|
71
71
|
newgraph = RDF::Graph.load(newresource)
|
@@ -78,13 +78,44 @@ class LbpCli < Thor
|
|
78
78
|
|
79
79
|
unless status == 'Not Started'
|
80
80
|
title = newdata.query(:predicate => RDF::DC11.title).first.object
|
81
|
+
if newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).count == 0
|
82
|
+
question_title = "unknown"
|
83
|
+
else
|
84
|
+
question_title = newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).first.object
|
85
|
+
end
|
81
86
|
id = URI(part.object.to_s).path.split('/').last
|
82
87
|
itemunit = "
|
83
88
|
<item live='#{status}'>
|
84
89
|
<fileName filestem='#{id}'>#{id}.xml</fileName>
|
85
90
|
<title>#{title}</title>
|
86
|
-
|
91
|
+
<questionTitle>#{question_title}</questionTitle>"
|
87
92
|
itemarray << itemunit
|
93
|
+
#eventually hasPart property should be sctap:hasTranscription
|
94
|
+
hastranscriptions = newdata.query(:predicate => RDF::DC.hasPart)
|
95
|
+
if hastranscriptions.count > 0
|
96
|
+
partunitopen = "\n<hasParts>"
|
97
|
+
itemarray << partunitopen
|
98
|
+
hastranscriptions.each do |transcription|
|
99
|
+
transcription_resource = RDF::Resource.new(transcription.object)
|
100
|
+
transcription_graph = RDF::Graph.load(transcription_resource)
|
101
|
+
transcription_data = transcription_graph.data
|
102
|
+
transcription_title = transcription_data.query(:predicate => RDF::DC11.title).first.object
|
103
|
+
transcription_id = URI(transcription.object.to_s).path.split('/').last
|
104
|
+
transcription_slug = transcription_id.split("_").first
|
105
|
+
transcription_initial = transcription_slug.each_char.first.upcase # not ideal, some initials will be two letters
|
106
|
+
partunit = "\n<part>
|
107
|
+
<slug>#{transcription_slug}</slug>
|
108
|
+
<title>#{transcription_title}</title>
|
109
|
+
<initial>#{transcription_initial}</initial>
|
110
|
+
</part>"
|
111
|
+
itemarray << partunit
|
112
|
+
end
|
113
|
+
partunitclose = "\n</hasParts>"
|
114
|
+
itemarray << partunitclose
|
115
|
+
end
|
116
|
+
#end
|
117
|
+
close_item_unit = "\n</item>\n"
|
118
|
+
itemarray << close_item_unit
|
88
119
|
end
|
89
120
|
|
90
121
|
end
|
data/lbp.gemspec
CHANGED
@@ -23,14 +23,13 @@ Gem::Specification.new do |spec|
|
|
23
23
|
spec.add_development_dependency "rspec"
|
24
24
|
spec.add_development_dependency "pry"
|
25
25
|
spec.add_runtime_dependency "nokogiri"
|
26
|
-
spec.add_runtime_dependency "rugged"
|
26
|
+
#spec.add_runtime_dependency "rugged"
|
27
27
|
spec.add_runtime_dependency "thor"
|
28
28
|
spec.add_runtime_dependency "rdf"
|
29
29
|
spec.add_runtime_dependency "rdf-rdfxml"
|
30
|
+
spec.add_runtime_dependency "rdf-vocab"
|
30
31
|
spec.add_runtime_dependency "rest-client"
|
32
|
+
spec.add_runtime_dependency "sparql"
|
31
33
|
|
32
34
|
|
33
|
-
|
34
|
-
|
35
|
-
|
36
35
|
end
|
data/lib/lbp.rb
CHANGED
@@ -1,11 +1,21 @@
|
|
1
1
|
require "lbp/version"
|
2
2
|
|
3
|
+
require 'lbp/functions'
|
4
|
+
|
5
|
+
#still need review
|
6
|
+
require 'lbp/query'
|
7
|
+
require 'lbp/paragraph_image'
|
3
8
|
|
4
|
-
|
5
|
-
|
6
|
-
require 'lbp/
|
9
|
+
|
10
|
+
#new files
|
11
|
+
require 'lbp/resource'
|
12
|
+
require 'lbp/expression'
|
13
|
+
require 'lbp/manifestation'
|
7
14
|
require 'lbp/transcription'
|
8
|
-
|
9
|
-
require 'lbp/
|
15
|
+
require 'lbp/file'
|
16
|
+
require 'lbp/file_part'
|
17
|
+
|
18
|
+
|
19
|
+
|
10
20
|
|
11
21
|
|
@@ -0,0 +1,163 @@
|
|
1
|
+
require 'openssl'
|
2
|
+
require 'rdf'
|
3
|
+
require 'rdf/rdfxml'
|
4
|
+
require 'rdf/ntriples'
|
5
|
+
require 'rdf/vocab'
|
6
|
+
require 'lbp'
|
7
|
+
|
8
|
+
|
9
|
+
|
10
|
+
|
11
|
+
module Lbp
|
12
|
+
class Expression < Resource
|
13
|
+
|
14
|
+
#inherits initialization from Resource
|
15
|
+
|
16
|
+
def manifestationUrls
|
17
|
+
results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasManifestation"))
|
18
|
+
manifestations = results.map {|m| m[:o].to_s}
|
19
|
+
return manifestations
|
20
|
+
end
|
21
|
+
def canonicalManifestationUrl
|
22
|
+
manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
|
23
|
+
return manifestation
|
24
|
+
end
|
25
|
+
def canonicalManifestation
|
26
|
+
url = self.canonicalManifestationUrl
|
27
|
+
manifestationObj = Manifestation.new(url)
|
28
|
+
return manifestationObj
|
29
|
+
end
|
30
|
+
def canonicalManifestation?
|
31
|
+
if self.canonicalManifestationUrl == nil
|
32
|
+
return false
|
33
|
+
else
|
34
|
+
return true
|
35
|
+
end
|
36
|
+
end
|
37
|
+
# cannonical transcriptions refers to the canonical trancription
|
38
|
+
# of the canonical manifestation
|
39
|
+
def canonicalTranscriptionUrl
|
40
|
+
manifestationObj = self.canonicalManifestation
|
41
|
+
url = manifestationObj.canonicalTranscriptionUrl
|
42
|
+
return url
|
43
|
+
end
|
44
|
+
def canonicalTranscription
|
45
|
+
url = self.canonicalTranscriptionUrl
|
46
|
+
transcriptionObj = Transcription.new(url)
|
47
|
+
return transcriptionObj
|
48
|
+
end
|
49
|
+
def canonicalTranscription?
|
50
|
+
if self.canonicalManifestation? == false
|
51
|
+
return false
|
52
|
+
else
|
53
|
+
if self.canonicalTranscriptionUrl == nil
|
54
|
+
return false
|
55
|
+
else
|
56
|
+
return true
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
def transcriptionUrl(manifestationUrl)
|
61
|
+
manifestationObj = Manifestation.new(manifestationUrl)
|
62
|
+
transcriptionObj = manifestationObj.canonicalTranscriptionUrl
|
63
|
+
return transcriptionObj
|
64
|
+
end
|
65
|
+
def transcription(manifestationUrl)
|
66
|
+
manifestationObj = Manifestation.new(manifestationUrl)
|
67
|
+
transcriptionObj = manifestationObj.canonicalTranscription
|
68
|
+
return transcriptionObj
|
69
|
+
end
|
70
|
+
def next
|
71
|
+
unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).count == 0
|
72
|
+
next_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).first[:o].to_s
|
73
|
+
else
|
74
|
+
next_expression = nil
|
75
|
+
end
|
76
|
+
return next_expression
|
77
|
+
end
|
78
|
+
def previous
|
79
|
+
unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).count == 0
|
80
|
+
previous_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).first[:o].to_s
|
81
|
+
else
|
82
|
+
previous_expression = nil
|
83
|
+
end
|
84
|
+
return previous_expression
|
85
|
+
end
|
86
|
+
def order_number
|
87
|
+
## TODO: consider changing property so that there is more symmetry here
|
88
|
+
if self.structureType_shortId == "structureBlock"
|
89
|
+
ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/paragraphNumber")).first[:o].to_s.to_i
|
90
|
+
else
|
91
|
+
ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/totalOrderNumber")).first[:o].to_s.to_i
|
92
|
+
end
|
93
|
+
return ordernumber
|
94
|
+
end
|
95
|
+
def status
|
96
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/status")).first[:o].to_s
|
97
|
+
end
|
98
|
+
|
99
|
+
def top_level_expression_url
|
100
|
+
#TODO make sure this can handle different structure types
|
101
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfTopLevelExpression")).first[:o].to_s
|
102
|
+
end
|
103
|
+
def top_level_expression_shortId
|
104
|
+
self.top_level_expression_url.split("/").last
|
105
|
+
end
|
106
|
+
def top_level_expression
|
107
|
+
expression = Expression.new(self.top_level_expression_url)
|
108
|
+
end
|
109
|
+
|
110
|
+
def item_level_expression_url
|
111
|
+
#TODO make sure this can handle different structure types
|
112
|
+
status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfStructureItem")).first[:o].to_s
|
113
|
+
end
|
114
|
+
def item_level_expression_shortId
|
115
|
+
self.item_level_expression_url.split("/").last
|
116
|
+
end
|
117
|
+
def item_level_expression
|
118
|
+
expression = Expression.new(self.item_level_expression_url)
|
119
|
+
end
|
120
|
+
def level
|
121
|
+
result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
|
122
|
+
unless self.results.count == 0
|
123
|
+
level = result.to_s.to_i
|
124
|
+
else
|
125
|
+
level = nil
|
126
|
+
end
|
127
|
+
return level
|
128
|
+
end
|
129
|
+
|
130
|
+
# connection properties
|
131
|
+
#TODO: notice how all these return RDF::Solutions (or some RDF:: object)
|
132
|
+
# rather already performing the conversion to strings as is done in all the above methods
|
133
|
+
# this should be standardized
|
134
|
+
def abbreviates
|
135
|
+
abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
|
136
|
+
end
|
137
|
+
def abbreviatedBy
|
138
|
+
abbreviatedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviatedBy"))
|
139
|
+
end
|
140
|
+
def references
|
141
|
+
references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/references"))
|
142
|
+
end
|
143
|
+
def referencedBy
|
144
|
+
references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/referencedBy"))
|
145
|
+
end
|
146
|
+
def copies
|
147
|
+
copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copies"))
|
148
|
+
end
|
149
|
+
def copiedBy
|
150
|
+
copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copiedBy"))
|
151
|
+
end
|
152
|
+
def mentions
|
153
|
+
mentions = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/mentions"))
|
154
|
+
end
|
155
|
+
def quotes
|
156
|
+
quotes = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotes"))
|
157
|
+
end
|
158
|
+
def quotedBy
|
159
|
+
quotedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotedBy"))
|
160
|
+
end
|
161
|
+
|
162
|
+
end
|
163
|
+
end
|
data/lib/lbp/file.rb
ADDED
@@ -0,0 +1,173 @@
|
|
1
|
+
require 'nokogiri'
|
2
|
+
#require 'lbp/functions'
|
3
|
+
#require 'lbp/item'
|
4
|
+
require 'open-uri'
|
5
|
+
require 'lbp'
|
6
|
+
|
7
|
+
module Lbp
|
8
|
+
# class should be renamed to Transcription
|
9
|
+
class File
|
10
|
+
attr_reader :xslt_dir, :file_path
|
11
|
+
|
12
|
+
def initialize(filepath, transcription_type, confighash)
|
13
|
+
@file_path = filepath
|
14
|
+
@confighash = confighash
|
15
|
+
@xslthash = @confighash[:xslt_dirs]
|
16
|
+
|
17
|
+
@type = transcription_type # critical or documentary
|
18
|
+
|
19
|
+
#xslt version needs to gathered from a method
|
20
|
+
xslt_version = nil
|
21
|
+
#for now its being set to nil because no documents currently declare it
|
22
|
+
|
23
|
+
if xslt_version == nil
|
24
|
+
@schema = @xslthash["default"]
|
25
|
+
else
|
26
|
+
@schema = @xslthash[xslt_version]
|
27
|
+
end
|
28
|
+
|
29
|
+
if @type == 'critical' || @type == 'Critical'
|
30
|
+
@xslt_dir = @schema[:critical]
|
31
|
+
elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
|
32
|
+
@xslt_dir = @schema[:documentary]
|
33
|
+
end
|
34
|
+
end
|
35
|
+
|
36
|
+
def file
|
37
|
+
#TODO: needs to be written so auth is only need after request without
|
38
|
+
#auth is rejected
|
39
|
+
|
40
|
+
#file = open(self.file_path)
|
41
|
+
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
42
|
+
return file
|
43
|
+
end
|
44
|
+
def nokogiri
|
45
|
+
xmldoc = Nokogiri::XML(self.file)
|
46
|
+
end
|
47
|
+
## End File Path Methods
|
48
|
+
### Item Header Extraction and Metadata Methods
|
49
|
+
def title
|
50
|
+
xmldoc = self.nokogiri
|
51
|
+
title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
52
|
+
return title.text
|
53
|
+
end
|
54
|
+
def author
|
55
|
+
xmldoc = self.nokogiri
|
56
|
+
author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
57
|
+
return author.text
|
58
|
+
end
|
59
|
+
def editor
|
60
|
+
xmldoc = self.nokogiri
|
61
|
+
editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
62
|
+
return editor.text
|
63
|
+
end
|
64
|
+
def ed_no
|
65
|
+
xmldoc = self.nokogiri
|
66
|
+
ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
67
|
+
return ed_no.value
|
68
|
+
end
|
69
|
+
def ed_date
|
70
|
+
xmldoc = self.nokogiri
|
71
|
+
ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
72
|
+
return ed_date.value
|
73
|
+
end
|
74
|
+
def pub_date
|
75
|
+
xmldoc = self.nokogiri
|
76
|
+
pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
77
|
+
return pub_date.value
|
78
|
+
end
|
79
|
+
def encoding_method
|
80
|
+
xmldoc = self.nokogiri
|
81
|
+
encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
82
|
+
return encoding_method.value
|
83
|
+
end
|
84
|
+
def encoding_location
|
85
|
+
xmldoc = self.nokogiri
|
86
|
+
encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
87
|
+
return encoding_location.value
|
88
|
+
end
|
89
|
+
def number_of_columns
|
90
|
+
xmldoc = self.nokogiri
|
91
|
+
test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
92
|
+
if @type == "critical"
|
93
|
+
number_of_columns = nil
|
94
|
+
elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
95
|
+
number_of_columns = 1
|
96
|
+
elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
97
|
+
number_of_columns = 2
|
98
|
+
end
|
99
|
+
return number_of_columns
|
100
|
+
end
|
101
|
+
|
102
|
+
### Begin transform (XSLT) methocs ###
|
103
|
+
def transform(xsltfile, xslt_param_array=[])
|
104
|
+
doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array)
|
105
|
+
end
|
106
|
+
def transform_apply(xsltfile, xslt_param_array=[])
|
107
|
+
doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
|
108
|
+
end
|
109
|
+
def transform_main_view(xslt_param_array=[])
|
110
|
+
xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
|
111
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
112
|
+
end
|
113
|
+
def transform_index_view(xslt_param_array=[])
|
114
|
+
xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
|
115
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
116
|
+
end
|
117
|
+
def transform_clean(xslt_param_array=[])
|
118
|
+
xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
|
119
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
120
|
+
end
|
121
|
+
def transform_clean_nokogiri(xslt_param_array=[])
|
122
|
+
xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
|
123
|
+
doc = self.transform(xsltfile, xslt_param_array)
|
124
|
+
end
|
125
|
+
def transform_plain_text(xslt_param_array=[])
|
126
|
+
xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
|
127
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
128
|
+
end
|
129
|
+
def transform_plain_text_nokogiri(xslt_param_array=[])
|
130
|
+
xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
|
131
|
+
doc = self.transform(xsltfile, xslt_param_array)
|
132
|
+
end
|
133
|
+
def transform_json(xslt_param_array=[])
|
134
|
+
xsltfile=@xslt_dir + @schema[:json] # "plaintext.xsl"
|
135
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
136
|
+
end
|
137
|
+
def transform_toc(xslt_param_array=[])
|
138
|
+
xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
|
139
|
+
doc = self.transform_apply(xsltfile, xslt_param_array)
|
140
|
+
end
|
141
|
+
### End of Transformation Methods ###
|
142
|
+
### Begin Statistics Methods ###
|
143
|
+
def word_count
|
144
|
+
plaintext = self.transform_plain_text
|
145
|
+
size = plaintext.split.size
|
146
|
+
end
|
147
|
+
def word_array
|
148
|
+
plaintext = self.transform_plain_text
|
149
|
+
word_array = plaintext.split
|
150
|
+
word_array.map!{ |word| word.downcase}
|
151
|
+
end
|
152
|
+
def word_frequency(sort, order)
|
153
|
+
word_array = self.word_array
|
154
|
+
wf = Hash.new(0)
|
155
|
+
word_array.each { |word| wf[word] += 1 }
|
156
|
+
|
157
|
+
if sort == "frequency"
|
158
|
+
if order == "descending" # high to low
|
159
|
+
wf = wf.sort_by{|k,v| v}.reverse
|
160
|
+
elsif order == "ascending" # low to high
|
161
|
+
wf = wf.sort_by{|k,v| v}
|
162
|
+
end
|
163
|
+
elsif sort == "word"
|
164
|
+
if order == "descending" # z - a
|
165
|
+
wf = wf.sort_by{|k,v| k}.reverse
|
166
|
+
elsif order == "ascending" #a - z
|
167
|
+
wf = wf.sort_by{|k,v| k}
|
168
|
+
end
|
169
|
+
end
|
170
|
+
return wf.to_h
|
171
|
+
end
|
172
|
+
end
|
173
|
+
end
|