lbp 0.1.0 → 0.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/lbp.rb +6 -6
- data/lib/lbp/article.rb +36 -0
- data/lib/lbp/expression.rb +59 -114
- data/lib/lbp/expression_type.rb +5 -0
- data/lib/lbp/file.rb +65 -37
- data/lib/lbp/manifestation.rb +5 -17
- data/lib/lbp/resource.rb +76 -50
- data/lib/lbp/resource_identifier.rb +40 -0
- data/lib/lbp/transcription.rb +16 -10
- data/lib/lbp/translation.rb +4 -0
- data/lib/lbp/version.rb +1 -1
- data/lib/lbp/work_group.rb +16 -0
- data/spec/article_spec.rb +23 -0
- data/spec/config_globals.rb +14 -15
- data/spec/expression_spec.rb +62 -34
- data/spec/file_spec.rb +55 -34
- data/spec/manifestation_spec.rb +16 -0
- data/spec/resource_identifier_spec.rb +53 -0
- data/spec/resource_spec.rb +22 -42
- data/spec/transcription_spec.rb +9 -4
- data/spec/work_group_spec.rb +29 -0
- metadata +14 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c8f418230bba408ab7abcc2bfc63535ce44a425
|
4
|
+
data.tar.gz: 89cb6d564dfd559786f940882b168ee20deab8b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f0cc56d0015815ac15c2e71e801afb290e3958205a0205c9b0160139e1c918a39aa6278e611b1f8f4f13b5931f1bd2f289b8c58cdb665b5de25afed919a89dd
|
7
|
+
data.tar.gz: dae6f816869aeeeb625a89c5eaffd4153c63ab071d79ed585611e3f8e293e0ef867461ac0d9ec5cb21ca1a378f4906cc8cac65bd1658aa1607b76b7b38f3ef7d
|
data/lib/lbp.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "lbp/version"
|
2
2
|
|
3
3
|
require 'lbp/functions'
|
4
|
-
|
4
|
+
|
5
5
|
#still need review
|
6
6
|
require 'lbp/query'
|
7
7
|
require 'lbp/paragraph_image'
|
@@ -9,13 +9,13 @@ require "lbp/version"
|
|
9
9
|
|
10
10
|
#new files
|
11
11
|
require 'lbp/resource'
|
12
|
+
require 'lbp/work_group'
|
12
13
|
require 'lbp/expression'
|
14
|
+
require 'lbp/expression_type'
|
13
15
|
require 'lbp/manifestation'
|
16
|
+
require 'lbp/translation'
|
14
17
|
require 'lbp/transcription'
|
15
18
|
require 'lbp/file'
|
16
19
|
require 'lbp/file_part'
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
require 'lbp/article'
|
21
|
+
require 'lbp/resource_identifier'
|
data/lib/lbp/article.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'openssl'
|
2
|
+
require 'rdf'
|
3
|
+
require 'rdf/rdfxml'
|
4
|
+
require 'rdf/ntriples'
|
5
|
+
require 'rdf/vocab'
|
6
|
+
require 'lbp'
|
7
|
+
|
8
|
+
module Lbp
|
9
|
+
class Article < Resource
|
10
|
+
#initionalization handled by Resource Class
|
11
|
+
def file_path
|
12
|
+
file_path = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasXML")).first[:o].to_s
|
13
|
+
end
|
14
|
+
def article_type
|
15
|
+
type = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/articleType")).first[:o].to_s
|
16
|
+
type.downcase
|
17
|
+
end
|
18
|
+
def article_type_shortId
|
19
|
+
self.article_type.split("/").last
|
20
|
+
end
|
21
|
+
|
22
|
+
#TODO: if the file object became more abstract the file operations could handle both transcriptions and articles
|
23
|
+
#Otherwise there should be a class called articleFile and transcriptionFile that inherits from generic File.
|
24
|
+
#def file(confighash)
|
25
|
+
# file = File.new(self.file_path, self.transcription_type, confighash)
|
26
|
+
# return file
|
27
|
+
#end
|
28
|
+
#NOTE: this really is a temporary method, since the database
|
29
|
+
#should point to file corresponding to each transcription
|
30
|
+
#dynamically generated by the exist-db database.
|
31
|
+
# but this could remain in case it was useful to grab the part
|
32
|
+
# from a file that would include a tei header etc.
|
33
|
+
|
34
|
+
#end
|
35
|
+
end
|
36
|
+
end
|
data/lib/lbp/expression.rb
CHANGED
@@ -6,157 +6,102 @@ require 'rdf/vocab'
|
|
6
6
|
require 'lbp'
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
9
|
module Lbp
|
12
10
|
class Expression < Resource
|
13
|
-
|
14
11
|
#inherits initialization from Resource
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
manifestations = results.map {|m| m[:o].to_s}
|
19
|
-
return manifestations
|
20
|
-
end
|
21
|
-
def canonicalManifestationUrl
|
22
|
-
manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
|
23
|
-
return manifestation
|
24
|
-
end
|
25
|
-
def canonicalManifestation
|
26
|
-
url = self.canonicalManifestationUrl
|
27
|
-
manifestationObj = Manifestation.new(url)
|
28
|
-
return manifestationObj
|
29
|
-
end
|
30
|
-
def canonicalManifestation?
|
31
|
-
if self.canonicalManifestationUrl == nil
|
32
|
-
return false
|
33
|
-
else
|
34
|
-
return true
|
35
|
-
end
|
12
|
+
|
13
|
+
def structure_type #returns resource identifier
|
14
|
+
value("http://scta.info/property/structureType")
|
36
15
|
end
|
37
|
-
#
|
38
|
-
|
39
|
-
def canonicalTranscriptionUrl
|
40
|
-
manifestationObj = self.canonicalManifestation
|
41
|
-
url = manifestationObj.canonicalTranscriptionUrl
|
42
|
-
return url
|
43
|
-
end
|
44
|
-
def canonicalTranscription
|
45
|
-
url = self.canonicalTranscriptionUrl
|
46
|
-
transcriptionObj = Transcription.new(url)
|
47
|
-
return transcriptionObj
|
48
|
-
end
|
49
|
-
def canonicalTranscription?
|
50
|
-
if self.canonicalManifestation? == false
|
51
|
-
return false
|
52
|
-
else
|
53
|
-
if self.canonicalTranscriptionUrl == nil
|
54
|
-
return false
|
55
|
-
else
|
56
|
-
return true
|
57
|
-
end
|
58
|
-
end
|
16
|
+
def manifestations # returns array of available manifestations as ResourceIdentifiers
|
17
|
+
values("http://scta.info/property/hasManifestation")
|
59
18
|
end
|
60
|
-
def
|
61
|
-
|
62
|
-
transcriptionObj = manifestationObj.canonicalTranscriptionUrl
|
63
|
-
return transcriptionObj
|
19
|
+
def canonical_manifestation # returns a single manifestation ResourceIdentifier
|
20
|
+
value("http://scta.info/property/hasCanonicalManifestation")
|
64
21
|
end
|
65
|
-
def
|
66
|
-
|
67
|
-
transcriptionObj = manifestationObj.canonicalTranscription
|
68
|
-
return transcriptionObj
|
22
|
+
def canonical_manifestation? # returns boolean
|
23
|
+
!canonical_manifestation.to_s.nil?
|
69
24
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
76
|
-
return next_expression
|
25
|
+
# translations are a subclass of manifestations for any kind of manifestation not in the original language
|
26
|
+
# note that this currently means the manifestations methods, will not grab translation-manifestations,
|
27
|
+
# these must be called with translations method
|
28
|
+
def translations
|
29
|
+
values("http://scta.info/property/hasTranslation")
|
77
30
|
end
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
31
|
+
def canonical_translation
|
32
|
+
values("http://scta.info/property/hasCanonicalTranslation")
|
33
|
+
end
|
34
|
+
def canonical_translation?
|
35
|
+
!canonical_translation.to_s.nil?
|
36
|
+
end
|
37
|
+
# cannonical transcriptions refers to the canonical trancription of the canonical manifestation
|
38
|
+
def canonical_transcription # returns single transcription as ResourceIdentifier
|
39
|
+
manifestation = canonical_manifestation
|
40
|
+
unless manifestation == nil
|
41
|
+
return manifestation.resource.canonical_transcription
|
83
42
|
end
|
84
|
-
return previous_expression
|
85
43
|
end
|
86
|
-
def
|
44
|
+
def canonical_transcription? #returns boolean
|
45
|
+
!canonical_transcription.nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def next # returns resource identifier of next expression or nil
|
49
|
+
value("http://scta.info/property/next")
|
50
|
+
end
|
51
|
+
def previous #returns ResourceIdentifier or nil
|
52
|
+
value("http://scta.info/property/previous")
|
53
|
+
end
|
54
|
+
def order_number # returns integer
|
87
55
|
## TODO: consider changing property so that there is more symmetry here
|
88
|
-
if
|
89
|
-
|
56
|
+
if structure_type.short_id == "structureBlock"
|
57
|
+
value("http://scta.info/property/paragraphNumber").to_s.to_i
|
90
58
|
else
|
91
|
-
|
59
|
+
value("http://scta.info/property/totalOrderNumber").to_s.to_i
|
92
60
|
end
|
93
|
-
return ordernumber
|
94
61
|
end
|
95
|
-
def status
|
96
|
-
|
62
|
+
def status #returns string
|
63
|
+
value("http://scta.info/property/status").to_s
|
97
64
|
end
|
98
|
-
|
99
|
-
def top_level_expression_url
|
65
|
+
def top_level_expression # returns resource identifier
|
100
66
|
#TODO make sure this can handle different structure types
|
101
|
-
|
102
|
-
end
|
103
|
-
def top_level_expression_shortId
|
104
|
-
self.top_level_expression_url.split("/").last
|
67
|
+
value("http://scta.info/property/isPartOfTopLevelExpression")
|
105
68
|
end
|
106
|
-
def
|
107
|
-
expression = Expression.new(self.top_level_expression_url)
|
108
|
-
end
|
109
|
-
|
110
|
-
def item_level_expression_url
|
69
|
+
def item_level_expression # returns resource identifier
|
111
70
|
#TODO make sure this can handle different structure types
|
112
|
-
|
113
|
-
end
|
114
|
-
def item_level_expression_shortId
|
115
|
-
self.item_level_expression_url.split("/").last
|
71
|
+
value("http://scta.info/property/isPartOfStructureItem")
|
116
72
|
end
|
117
|
-
def
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
|
122
|
-
unless self.results.count == 0
|
123
|
-
level = result.to_s.to_i
|
124
|
-
else
|
125
|
-
level = nil
|
126
|
-
end
|
127
|
-
return level
|
73
|
+
def level # returns resource integer
|
74
|
+
#same comment as earlier; this query does not actually return a uri,
|
75
|
+
#but an litteral. We need to make sure the resource identifer can handle that
|
76
|
+
value("http://scta.info/property/level").to_s.to_i
|
128
77
|
end
|
129
78
|
|
130
|
-
#
|
131
|
-
|
132
|
-
# rather already performing the conversion to strings as is done in all the above methods
|
133
|
-
# this should be standardized
|
134
|
-
def abbreviates
|
135
|
-
abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
|
79
|
+
def abbreviates # returns array of ResourceIdentifiers
|
80
|
+
values("http://scta.info/property/abbreviates")
|
136
81
|
end
|
137
82
|
def abbreviatedBy
|
138
|
-
|
83
|
+
values("http://scta.info/property/abbreviatedBy")
|
139
84
|
end
|
140
85
|
def references
|
141
|
-
|
86
|
+
values("http://scta.info/property/references")
|
142
87
|
end
|
143
88
|
def referencedBy
|
144
|
-
|
89
|
+
values("http://scta.info/property/referencedBy")
|
145
90
|
end
|
146
91
|
def copies
|
147
|
-
|
92
|
+
values("http://scta.info/property/copies")
|
148
93
|
end
|
149
94
|
def copiedBy
|
150
|
-
|
95
|
+
values("http://scta.info/property/copiedBy")
|
151
96
|
end
|
152
97
|
def mentions
|
153
|
-
|
98
|
+
values("http://scta.info/property/mentions")
|
154
99
|
end
|
155
100
|
def quotes
|
156
|
-
|
101
|
+
values("http://scta.info/property/quotes")
|
157
102
|
end
|
158
103
|
def quotedBy
|
159
|
-
|
104
|
+
values("http://scta.info/property/quotedBy")
|
160
105
|
end
|
161
106
|
|
162
107
|
end
|
data/lib/lbp/file.rb
CHANGED
@@ -6,45 +6,69 @@ require 'lbp'
|
|
6
6
|
|
7
7
|
module Lbp
|
8
8
|
# class should be renamed to Transcription
|
9
|
-
class File
|
9
|
+
class File
|
10
10
|
attr_reader :xslt_dir, :file_path
|
11
11
|
|
12
12
|
def initialize(filepath, transcription_type, confighash)
|
13
13
|
@file_path = filepath
|
14
14
|
@confighash = confighash
|
15
|
-
@xslthash = @confighash[:xslt_dirs]
|
16
15
|
|
17
|
-
|
16
|
+
unless confighash == nil
|
17
|
+
@stylesheets = @confighash[:stylesheets]
|
18
|
+
# identify propery xslt directory
|
19
|
+
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
#for now its being set to nil because no documents currently declare it
|
21
|
+
# get trancription type from xmlfile
|
22
|
+
@transcription_type = transcription_type # critical or documentary # there is also a method for this if one needs to get the type from the file itself
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# get xslt_version from xmlfile
|
25
|
+
@xslt_version = self.validating_schema_version
|
26
|
+
|
27
|
+
unless confighash == nil
|
28
|
+
@xslt_dir = "#{@confighash[:xslt_base]}#{@xslt_version}/#{@transcription_type}/"
|
29
|
+
end
|
28
30
|
|
29
|
-
if @type == 'critical' || @type == 'Critical'
|
30
|
-
@xslt_dir = @schema[:critical]
|
31
|
-
elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
|
32
|
-
@xslt_dir = @schema[:documentary]
|
33
|
-
end
|
34
31
|
end
|
35
|
-
|
32
|
+
|
36
33
|
def file
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
34
|
+
file = open(self.file_path)
|
35
|
+
if file.base_uri.to_s != self.file_path
|
36
|
+
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
37
|
+
end
|
42
38
|
return file
|
43
39
|
end
|
44
40
|
def nokogiri
|
45
41
|
xmldoc = Nokogiri::XML(self.file)
|
46
42
|
end
|
47
43
|
## End File Path Methods
|
44
|
+
|
45
|
+
## Get transcription type
|
46
|
+
def transcription_type_from_file
|
47
|
+
xmldoc = self.nokogiri
|
48
|
+
|
49
|
+
result = xmldoc.xpath("/tei:TEI/tei:text[1]/@type", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
50
|
+
|
51
|
+
if result.length > 0
|
52
|
+
return result.to_s
|
53
|
+
else
|
54
|
+
return "unknown"
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
## get validating schema label
|
59
|
+
def validating_schema_version
|
60
|
+
xmldoc = self.nokogiri
|
61
|
+
result = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:schemaRef[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
62
|
+
if result.length > 0
|
63
|
+
return result.to_s.split("-").last
|
64
|
+
else
|
65
|
+
return "default"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def transcription_type
|
70
|
+
|
71
|
+
end
|
48
72
|
### Item Header Extraction and Metadata Methods
|
49
73
|
def title
|
50
74
|
xmldoc = self.nokogiri
|
@@ -72,9 +96,13 @@ module Lbp
|
|
72
96
|
return ed_date.value
|
73
97
|
end
|
74
98
|
def pub_date
|
75
|
-
|
76
|
-
|
77
|
-
|
99
|
+
if self.validating_schema_version == "1.0.0"
|
100
|
+
return "no pub date in this schema"
|
101
|
+
else
|
102
|
+
xmldoc = self.nokogiri
|
103
|
+
pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
104
|
+
return pub_date.value
|
105
|
+
end
|
78
106
|
end
|
79
107
|
def encoding_method
|
80
108
|
xmldoc = self.nokogiri
|
@@ -89,7 +117,7 @@ module Lbp
|
|
89
117
|
def number_of_columns
|
90
118
|
xmldoc = self.nokogiri
|
91
119
|
test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
92
|
-
if @
|
120
|
+
if @transcription_type == "critical"
|
93
121
|
number_of_columns = nil
|
94
122
|
elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
95
123
|
number_of_columns = 1
|
@@ -107,35 +135,35 @@ module Lbp
|
|
107
135
|
doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
|
108
136
|
end
|
109
137
|
def transform_main_view(xslt_param_array=[])
|
110
|
-
xsltfile=@xslt_dir + @
|
138
|
+
xsltfile=@xslt_dir + @stylesheets[:main_view] # "text_display.xsl"
|
111
139
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
112
140
|
end
|
113
141
|
def transform_index_view(xslt_param_array=[])
|
114
|
-
xsltfile=@xslt_dir + @
|
142
|
+
xsltfile=@xslt_dir + @stylesheets[:index_view] # "text_display_index.xsl"
|
115
143
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
116
144
|
end
|
117
145
|
def transform_clean(xslt_param_array=[])
|
118
|
-
xsltfile=@xslt_dir + @
|
146
|
+
xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
|
119
147
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
120
148
|
end
|
121
149
|
def transform_clean_nokogiri(xslt_param_array=[])
|
122
|
-
xsltfile=@xslt_dir + @
|
150
|
+
xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
|
123
151
|
doc = self.transform(xsltfile, xslt_param_array)
|
124
152
|
end
|
125
153
|
def transform_plain_text(xslt_param_array=[])
|
126
|
-
xsltfile=@xslt_dir + @
|
154
|
+
xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
|
127
155
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
128
156
|
end
|
129
157
|
def transform_plain_text_nokogiri(xslt_param_array=[])
|
130
|
-
xsltfile=@xslt_dir + @
|
158
|
+
xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
|
131
159
|
doc = self.transform(xsltfile, xslt_param_array)
|
132
160
|
end
|
133
161
|
def transform_json(xslt_param_array=[])
|
134
|
-
xsltfile=@xslt_dir + @
|
162
|
+
xsltfile=@xslt_dir + @stylesheets[:json] # "plaintext.xsl"
|
135
163
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
136
164
|
end
|
137
165
|
def transform_toc(xslt_param_array=[])
|
138
|
-
xsltfile=@xslt_dir + @
|
166
|
+
xsltfile=@xslt_dir + @stylesheets[:toc] # "lectio_outline.xsl"
|
139
167
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
140
168
|
end
|
141
169
|
### End of Transformation Methods ###
|
@@ -153,8 +181,8 @@ module Lbp
|
|
153
181
|
word_array = self.word_array
|
154
182
|
wf = Hash.new(0)
|
155
183
|
word_array.each { |word| wf[word] += 1 }
|
156
|
-
|
157
|
-
if sort == "frequency"
|
184
|
+
|
185
|
+
if sort == "frequency"
|
158
186
|
if order == "descending" # high to low
|
159
187
|
wf = wf.sort_by{|k,v| v}.reverse
|
160
188
|
elsif order == "ascending" # low to high
|
@@ -170,4 +198,4 @@ module Lbp
|
|
170
198
|
return wf.to_h
|
171
199
|
end
|
172
200
|
end
|
173
|
-
end
|
201
|
+
end
|