lbp 0.1.0 → 0.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/lbp.rb +6 -6
- data/lib/lbp/article.rb +36 -0
- data/lib/lbp/expression.rb +59 -114
- data/lib/lbp/expression_type.rb +5 -0
- data/lib/lbp/file.rb +65 -37
- data/lib/lbp/manifestation.rb +5 -17
- data/lib/lbp/resource.rb +76 -50
- data/lib/lbp/resource_identifier.rb +40 -0
- data/lib/lbp/transcription.rb +16 -10
- data/lib/lbp/translation.rb +4 -0
- data/lib/lbp/version.rb +1 -1
- data/lib/lbp/work_group.rb +16 -0
- data/spec/article_spec.rb +23 -0
- data/spec/config_globals.rb +14 -15
- data/spec/expression_spec.rb +62 -34
- data/spec/file_spec.rb +55 -34
- data/spec/manifestation_spec.rb +16 -0
- data/spec/resource_identifier_spec.rb +53 -0
- data/spec/resource_spec.rb +22 -42
- data/spec/transcription_spec.rb +9 -4
- data/spec/work_group_spec.rb +29 -0
- metadata +14 -1
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 5c8f418230bba408ab7abcc2bfc63535ce44a425
|
4
|
+
data.tar.gz: 89cb6d564dfd559786f940882b168ee20deab8b5
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4f0cc56d0015815ac15c2e71e801afb290e3958205a0205c9b0160139e1c918a39aa6278e611b1f8f4f13b5931f1bd2f289b8c58cdb665b5de25afed919a89dd
|
7
|
+
data.tar.gz: dae6f816869aeeeb625a89c5eaffd4153c63ab071d79ed585611e3f8e293e0ef867461ac0d9ec5cb21ca1a378f4906cc8cac65bd1658aa1607b76b7b38f3ef7d
|
data/lib/lbp.rb
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
require "lbp/version"
|
2
2
|
|
3
3
|
require 'lbp/functions'
|
4
|
-
|
4
|
+
|
5
5
|
#still need review
|
6
6
|
require 'lbp/query'
|
7
7
|
require 'lbp/paragraph_image'
|
@@ -9,13 +9,13 @@ require "lbp/version"
|
|
9
9
|
|
10
10
|
#new files
|
11
11
|
require 'lbp/resource'
|
12
|
+
require 'lbp/work_group'
|
12
13
|
require 'lbp/expression'
|
14
|
+
require 'lbp/expression_type'
|
13
15
|
require 'lbp/manifestation'
|
16
|
+
require 'lbp/translation'
|
14
17
|
require 'lbp/transcription'
|
15
18
|
require 'lbp/file'
|
16
19
|
require 'lbp/file_part'
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
20
|
+
require 'lbp/article'
|
21
|
+
require 'lbp/resource_identifier'
|
data/lib/lbp/article.rb
ADDED
@@ -0,0 +1,36 @@
|
|
1
|
+
require 'openssl'
|
2
|
+
require 'rdf'
|
3
|
+
require 'rdf/rdfxml'
|
4
|
+
require 'rdf/ntriples'
|
5
|
+
require 'rdf/vocab'
|
6
|
+
require 'lbp'
|
7
|
+
|
8
|
+
module Lbp
|
9
|
+
class Article < Resource
|
10
|
+
#initionalization handled by Resource Class
|
11
|
+
def file_path
|
12
|
+
file_path = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasXML")).first[:o].to_s
|
13
|
+
end
|
14
|
+
def article_type
|
15
|
+
type = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/articleType")).first[:o].to_s
|
16
|
+
type.downcase
|
17
|
+
end
|
18
|
+
def article_type_shortId
|
19
|
+
self.article_type.split("/").last
|
20
|
+
end
|
21
|
+
|
22
|
+
#TODO: if the file object became more abstract the file operations could handle both transcriptions and articles
|
23
|
+
#Otherwise there should be a class called articleFile and transcriptionFile that inherits from generic File.
|
24
|
+
#def file(confighash)
|
25
|
+
# file = File.new(self.file_path, self.transcription_type, confighash)
|
26
|
+
# return file
|
27
|
+
#end
|
28
|
+
#NOTE: this really is a temporary method, since the database
|
29
|
+
#should point to file corresponding to each transcription
|
30
|
+
#dynamically generated by the exist-db database.
|
31
|
+
# but this could remain in case it was useful to grab the part
|
32
|
+
# from a file that would include a tei header etc.
|
33
|
+
|
34
|
+
#end
|
35
|
+
end
|
36
|
+
end
|
data/lib/lbp/expression.rb
CHANGED
@@ -6,157 +6,102 @@ require 'rdf/vocab'
|
|
6
6
|
require 'lbp'
|
7
7
|
|
8
8
|
|
9
|
-
|
10
|
-
|
11
9
|
module Lbp
|
12
10
|
class Expression < Resource
|
13
|
-
|
14
11
|
#inherits initialization from Resource
|
15
|
-
|
16
|
-
def
|
17
|
-
|
18
|
-
manifestations = results.map {|m| m[:o].to_s}
|
19
|
-
return manifestations
|
20
|
-
end
|
21
|
-
def canonicalManifestationUrl
|
22
|
-
manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
|
23
|
-
return manifestation
|
24
|
-
end
|
25
|
-
def canonicalManifestation
|
26
|
-
url = self.canonicalManifestationUrl
|
27
|
-
manifestationObj = Manifestation.new(url)
|
28
|
-
return manifestationObj
|
29
|
-
end
|
30
|
-
def canonicalManifestation?
|
31
|
-
if self.canonicalManifestationUrl == nil
|
32
|
-
return false
|
33
|
-
else
|
34
|
-
return true
|
35
|
-
end
|
12
|
+
|
13
|
+
def structure_type #returns resource identifier
|
14
|
+
value("http://scta.info/property/structureType")
|
36
15
|
end
|
37
|
-
#
|
38
|
-
|
39
|
-
def canonicalTranscriptionUrl
|
40
|
-
manifestationObj = self.canonicalManifestation
|
41
|
-
url = manifestationObj.canonicalTranscriptionUrl
|
42
|
-
return url
|
43
|
-
end
|
44
|
-
def canonicalTranscription
|
45
|
-
url = self.canonicalTranscriptionUrl
|
46
|
-
transcriptionObj = Transcription.new(url)
|
47
|
-
return transcriptionObj
|
48
|
-
end
|
49
|
-
def canonicalTranscription?
|
50
|
-
if self.canonicalManifestation? == false
|
51
|
-
return false
|
52
|
-
else
|
53
|
-
if self.canonicalTranscriptionUrl == nil
|
54
|
-
return false
|
55
|
-
else
|
56
|
-
return true
|
57
|
-
end
|
58
|
-
end
|
16
|
+
def manifestations # returns array of available manifestations as ResourceIdentifiers
|
17
|
+
values("http://scta.info/property/hasManifestation")
|
59
18
|
end
|
60
|
-
def
|
61
|
-
|
62
|
-
transcriptionObj = manifestationObj.canonicalTranscriptionUrl
|
63
|
-
return transcriptionObj
|
19
|
+
def canonical_manifestation # returns a single manifestation ResourceIdentifier
|
20
|
+
value("http://scta.info/property/hasCanonicalManifestation")
|
64
21
|
end
|
65
|
-
def
|
66
|
-
|
67
|
-
transcriptionObj = manifestationObj.canonicalTranscription
|
68
|
-
return transcriptionObj
|
22
|
+
def canonical_manifestation? # returns boolean
|
23
|
+
!canonical_manifestation.to_s.nil?
|
69
24
|
end
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
end
|
76
|
-
return next_expression
|
25
|
+
# translations are a subclass of manifestations for any kind of manifestation not in the original language
|
26
|
+
# note that this currently means the manifestations methods, will not grab translation-manifestations,
|
27
|
+
# these must be called with translations method
|
28
|
+
def translations
|
29
|
+
values("http://scta.info/property/hasTranslation")
|
77
30
|
end
|
78
|
-
def
|
79
|
-
|
80
|
-
|
81
|
-
|
82
|
-
|
31
|
+
def canonical_translation
|
32
|
+
values("http://scta.info/property/hasCanonicalTranslation")
|
33
|
+
end
|
34
|
+
def canonical_translation?
|
35
|
+
!canonical_translation.to_s.nil?
|
36
|
+
end
|
37
|
+
# cannonical transcriptions refers to the canonical trancription of the canonical manifestation
|
38
|
+
def canonical_transcription # returns single transcription as ResourceIdentifier
|
39
|
+
manifestation = canonical_manifestation
|
40
|
+
unless manifestation == nil
|
41
|
+
return manifestation.resource.canonical_transcription
|
83
42
|
end
|
84
|
-
return previous_expression
|
85
43
|
end
|
86
|
-
def
|
44
|
+
def canonical_transcription? #returns boolean
|
45
|
+
!canonical_transcription.nil?
|
46
|
+
end
|
47
|
+
|
48
|
+
def next # returns resource identifier of next expression or nil
|
49
|
+
value("http://scta.info/property/next")
|
50
|
+
end
|
51
|
+
def previous #returns ResourceIdentifier or nil
|
52
|
+
value("http://scta.info/property/previous")
|
53
|
+
end
|
54
|
+
def order_number # returns integer
|
87
55
|
## TODO: consider changing property so that there is more symmetry here
|
88
|
-
if
|
89
|
-
|
56
|
+
if structure_type.short_id == "structureBlock"
|
57
|
+
value("http://scta.info/property/paragraphNumber").to_s.to_i
|
90
58
|
else
|
91
|
-
|
59
|
+
value("http://scta.info/property/totalOrderNumber").to_s.to_i
|
92
60
|
end
|
93
|
-
return ordernumber
|
94
61
|
end
|
95
|
-
def status
|
96
|
-
|
62
|
+
def status #returns string
|
63
|
+
value("http://scta.info/property/status").to_s
|
97
64
|
end
|
98
|
-
|
99
|
-
def top_level_expression_url
|
65
|
+
def top_level_expression # returns resource identifier
|
100
66
|
#TODO make sure this can handle different structure types
|
101
|
-
|
102
|
-
end
|
103
|
-
def top_level_expression_shortId
|
104
|
-
self.top_level_expression_url.split("/").last
|
67
|
+
value("http://scta.info/property/isPartOfTopLevelExpression")
|
105
68
|
end
|
106
|
-
def
|
107
|
-
expression = Expression.new(self.top_level_expression_url)
|
108
|
-
end
|
109
|
-
|
110
|
-
def item_level_expression_url
|
69
|
+
def item_level_expression # returns resource identifier
|
111
70
|
#TODO make sure this can handle different structure types
|
112
|
-
|
113
|
-
end
|
114
|
-
def item_level_expression_shortId
|
115
|
-
self.item_level_expression_url.split("/").last
|
71
|
+
value("http://scta.info/property/isPartOfStructureItem")
|
116
72
|
end
|
117
|
-
def
|
118
|
-
|
119
|
-
|
120
|
-
|
121
|
-
result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
|
122
|
-
unless self.results.count == 0
|
123
|
-
level = result.to_s.to_i
|
124
|
-
else
|
125
|
-
level = nil
|
126
|
-
end
|
127
|
-
return level
|
73
|
+
def level # returns resource integer
|
74
|
+
#same comment as earlier; this query does not actually return a uri,
|
75
|
+
#but an litteral. We need to make sure the resource identifer can handle that
|
76
|
+
value("http://scta.info/property/level").to_s.to_i
|
128
77
|
end
|
129
78
|
|
130
|
-
#
|
131
|
-
|
132
|
-
# rather already performing the conversion to strings as is done in all the above methods
|
133
|
-
# this should be standardized
|
134
|
-
def abbreviates
|
135
|
-
abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
|
79
|
+
def abbreviates # returns array of ResourceIdentifiers
|
80
|
+
values("http://scta.info/property/abbreviates")
|
136
81
|
end
|
137
82
|
def abbreviatedBy
|
138
|
-
|
83
|
+
values("http://scta.info/property/abbreviatedBy")
|
139
84
|
end
|
140
85
|
def references
|
141
|
-
|
86
|
+
values("http://scta.info/property/references")
|
142
87
|
end
|
143
88
|
def referencedBy
|
144
|
-
|
89
|
+
values("http://scta.info/property/referencedBy")
|
145
90
|
end
|
146
91
|
def copies
|
147
|
-
|
92
|
+
values("http://scta.info/property/copies")
|
148
93
|
end
|
149
94
|
def copiedBy
|
150
|
-
|
95
|
+
values("http://scta.info/property/copiedBy")
|
151
96
|
end
|
152
97
|
def mentions
|
153
|
-
|
98
|
+
values("http://scta.info/property/mentions")
|
154
99
|
end
|
155
100
|
def quotes
|
156
|
-
|
101
|
+
values("http://scta.info/property/quotes")
|
157
102
|
end
|
158
103
|
def quotedBy
|
159
|
-
|
104
|
+
values("http://scta.info/property/quotedBy")
|
160
105
|
end
|
161
106
|
|
162
107
|
end
|
data/lib/lbp/file.rb
CHANGED
@@ -6,45 +6,69 @@ require 'lbp'
|
|
6
6
|
|
7
7
|
module Lbp
|
8
8
|
# class should be renamed to Transcription
|
9
|
-
class File
|
9
|
+
class File
|
10
10
|
attr_reader :xslt_dir, :file_path
|
11
11
|
|
12
12
|
def initialize(filepath, transcription_type, confighash)
|
13
13
|
@file_path = filepath
|
14
14
|
@confighash = confighash
|
15
|
-
@xslthash = @confighash[:xslt_dirs]
|
16
15
|
|
17
|
-
|
16
|
+
unless confighash == nil
|
17
|
+
@stylesheets = @confighash[:stylesheets]
|
18
|
+
# identify propery xslt directory
|
19
|
+
end
|
18
20
|
|
19
|
-
|
20
|
-
|
21
|
-
#for now its being set to nil because no documents currently declare it
|
21
|
+
# get trancription type from xmlfile
|
22
|
+
@transcription_type = transcription_type # critical or documentary # there is also a method for this if one needs to get the type from the file itself
|
22
23
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
24
|
+
# get xslt_version from xmlfile
|
25
|
+
@xslt_version = self.validating_schema_version
|
26
|
+
|
27
|
+
unless confighash == nil
|
28
|
+
@xslt_dir = "#{@confighash[:xslt_base]}#{@xslt_version}/#{@transcription_type}/"
|
29
|
+
end
|
28
30
|
|
29
|
-
if @type == 'critical' || @type == 'Critical'
|
30
|
-
@xslt_dir = @schema[:critical]
|
31
|
-
elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
|
32
|
-
@xslt_dir = @schema[:documentary]
|
33
|
-
end
|
34
31
|
end
|
35
|
-
|
32
|
+
|
36
33
|
def file
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
34
|
+
file = open(self.file_path)
|
35
|
+
if file.base_uri.to_s != self.file_path
|
36
|
+
file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
|
37
|
+
end
|
42
38
|
return file
|
43
39
|
end
|
44
40
|
def nokogiri
|
45
41
|
xmldoc = Nokogiri::XML(self.file)
|
46
42
|
end
|
47
43
|
## End File Path Methods
|
44
|
+
|
45
|
+
## Get transcription type
|
46
|
+
def transcription_type_from_file
|
47
|
+
xmldoc = self.nokogiri
|
48
|
+
|
49
|
+
result = xmldoc.xpath("/tei:TEI/tei:text[1]/@type", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
50
|
+
|
51
|
+
if result.length > 0
|
52
|
+
return result.to_s
|
53
|
+
else
|
54
|
+
return "unknown"
|
55
|
+
end
|
56
|
+
|
57
|
+
end
|
58
|
+
## get validating schema label
|
59
|
+
def validating_schema_version
|
60
|
+
xmldoc = self.nokogiri
|
61
|
+
result = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:schemaRef[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
62
|
+
if result.length > 0
|
63
|
+
return result.to_s.split("-").last
|
64
|
+
else
|
65
|
+
return "default"
|
66
|
+
end
|
67
|
+
end
|
68
|
+
|
69
|
+
def transcription_type
|
70
|
+
|
71
|
+
end
|
48
72
|
### Item Header Extraction and Metadata Methods
|
49
73
|
def title
|
50
74
|
xmldoc = self.nokogiri
|
@@ -72,9 +96,13 @@ module Lbp
|
|
72
96
|
return ed_date.value
|
73
97
|
end
|
74
98
|
def pub_date
|
75
|
-
|
76
|
-
|
77
|
-
|
99
|
+
if self.validating_schema_version == "1.0.0"
|
100
|
+
return "no pub date in this schema"
|
101
|
+
else
|
102
|
+
xmldoc = self.nokogiri
|
103
|
+
pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
104
|
+
return pub_date.value
|
105
|
+
end
|
78
106
|
end
|
79
107
|
def encoding_method
|
80
108
|
xmldoc = self.nokogiri
|
@@ -89,7 +117,7 @@ module Lbp
|
|
89
117
|
def number_of_columns
|
90
118
|
xmldoc = self.nokogiri
|
91
119
|
test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
|
92
|
-
if @
|
120
|
+
if @transcription_type == "critical"
|
93
121
|
number_of_columns = nil
|
94
122
|
elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
|
95
123
|
number_of_columns = 1
|
@@ -107,35 +135,35 @@ module Lbp
|
|
107
135
|
doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
|
108
136
|
end
|
109
137
|
def transform_main_view(xslt_param_array=[])
|
110
|
-
xsltfile=@xslt_dir + @
|
138
|
+
xsltfile=@xslt_dir + @stylesheets[:main_view] # "text_display.xsl"
|
111
139
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
112
140
|
end
|
113
141
|
def transform_index_view(xslt_param_array=[])
|
114
|
-
xsltfile=@xslt_dir + @
|
142
|
+
xsltfile=@xslt_dir + @stylesheets[:index_view] # "text_display_index.xsl"
|
115
143
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
116
144
|
end
|
117
145
|
def transform_clean(xslt_param_array=[])
|
118
|
-
xsltfile=@xslt_dir + @
|
146
|
+
xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
|
119
147
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
120
148
|
end
|
121
149
|
def transform_clean_nokogiri(xslt_param_array=[])
|
122
|
-
xsltfile=@xslt_dir + @
|
150
|
+
xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
|
123
151
|
doc = self.transform(xsltfile, xslt_param_array)
|
124
152
|
end
|
125
153
|
def transform_plain_text(xslt_param_array=[])
|
126
|
-
xsltfile=@xslt_dir + @
|
154
|
+
xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
|
127
155
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
128
156
|
end
|
129
157
|
def transform_plain_text_nokogiri(xslt_param_array=[])
|
130
|
-
xsltfile=@xslt_dir + @
|
158
|
+
xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
|
131
159
|
doc = self.transform(xsltfile, xslt_param_array)
|
132
160
|
end
|
133
161
|
def transform_json(xslt_param_array=[])
|
134
|
-
xsltfile=@xslt_dir + @
|
162
|
+
xsltfile=@xslt_dir + @stylesheets[:json] # "plaintext.xsl"
|
135
163
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
136
164
|
end
|
137
165
|
def transform_toc(xslt_param_array=[])
|
138
|
-
xsltfile=@xslt_dir + @
|
166
|
+
xsltfile=@xslt_dir + @stylesheets[:toc] # "lectio_outline.xsl"
|
139
167
|
doc = self.transform_apply(xsltfile, xslt_param_array)
|
140
168
|
end
|
141
169
|
### End of Transformation Methods ###
|
@@ -153,8 +181,8 @@ module Lbp
|
|
153
181
|
word_array = self.word_array
|
154
182
|
wf = Hash.new(0)
|
155
183
|
word_array.each { |word| wf[word] += 1 }
|
156
|
-
|
157
|
-
if sort == "frequency"
|
184
|
+
|
185
|
+
if sort == "frequency"
|
158
186
|
if order == "descending" # high to low
|
159
187
|
wf = wf.sort_by{|k,v| v}.reverse
|
160
188
|
elsif order == "ascending" # low to high
|
@@ -170,4 +198,4 @@ module Lbp
|
|
170
198
|
return wf.to_h
|
171
199
|
end
|
172
200
|
end
|
173
|
-
end
|
201
|
+
end
|