lbp 0.1.0 → 0.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: be83290049ce0d3c203adf4e3ff2139a709ab72c
4
- data.tar.gz: cfdfe9478a8f86ddd3e381c391cfc97142241f45
3
+ metadata.gz: 5c8f418230bba408ab7abcc2bfc63535ce44a425
4
+ data.tar.gz: 89cb6d564dfd559786f940882b168ee20deab8b5
5
5
  SHA512:
6
- metadata.gz: cc21ee397365f89a2194178c533d36fe42d91bebf472561234002a8a9e9e84ca3b61553daa12baf4a5aad9f55f78edbaadf5950e1ffd58cae52e8b1a253d6fe9
7
- data.tar.gz: 0eb62da1320ecb9377ff83a5ce4fa3369117fa510af19c5160580823dcf0e00940b8f39750c5d2eddbddf28c9b9dd377143fad7e6ed2b31a9946f9b507b3ad31
6
+ metadata.gz: 4f0cc56d0015815ac15c2e71e801afb290e3958205a0205c9b0160139e1c918a39aa6278e611b1f8f4f13b5931f1bd2f289b8c58cdb665b5de25afed919a89dd
7
+ data.tar.gz: dae6f816869aeeeb625a89c5eaffd4153c63ab071d79ed585611e3f8e293e0ef867461ac0d9ec5cb21ca1a378f4906cc8cac65bd1658aa1607b76b7b38f3ef7d
data/lib/lbp.rb CHANGED
@@ -1,7 +1,7 @@
1
1
  require "lbp/version"
2
2
 
3
3
  require 'lbp/functions'
4
-
4
+
5
5
  #still need review
6
6
  require 'lbp/query'
7
7
  require 'lbp/paragraph_image'
@@ -9,13 +9,13 @@ require "lbp/version"
9
9
 
10
10
  #new files
11
11
  require 'lbp/resource'
12
+ require 'lbp/work_group'
12
13
  require 'lbp/expression'
14
+ require 'lbp/expression_type'
13
15
  require 'lbp/manifestation'
16
+ require 'lbp/translation'
14
17
  require 'lbp/transcription'
15
18
  require 'lbp/file'
16
19
  require 'lbp/file_part'
17
-
18
-
19
-
20
-
21
-
20
+ require 'lbp/article'
21
+ require 'lbp/resource_identifier'
@@ -0,0 +1,36 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+ module Lbp
9
+ class Article < Resource
10
+ #initionalization handled by Resource Class
11
+ def file_path
12
+ file_path = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasXML")).first[:o].to_s
13
+ end
14
+ def article_type
15
+ type = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/articleType")).first[:o].to_s
16
+ type.downcase
17
+ end
18
+ def article_type_shortId
19
+ self.article_type.split("/").last
20
+ end
21
+
22
+ #TODO: if the file object became more abstract the file operations could handle both transcriptions and articles
23
+ #Otherwise there should be a class called articleFile and transcriptionFile that inherits from generic File.
24
+ #def file(confighash)
25
+ # file = File.new(self.file_path, self.transcription_type, confighash)
26
+ # return file
27
+ #end
28
+ #NOTE: this really is a temporary method, since the database
29
+ #should point to file corresponding to each transcription
30
+ #dynamically generated by the exist-db database.
31
+ # but this could remain in case it was useful to grab the part
32
+ # from a file that would include a tei header etc.
33
+
34
+ #end
35
+ end
36
+ end
@@ -6,157 +6,102 @@ require 'rdf/vocab'
6
6
  require 'lbp'
7
7
 
8
8
 
9
-
10
-
11
9
  module Lbp
12
10
  class Expression < Resource
13
-
14
11
  #inherits initialization from Resource
15
-
16
- def manifestationUrls
17
- results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasManifestation"))
18
- manifestations = results.map {|m| m[:o].to_s}
19
- return manifestations
20
- end
21
- def canonicalManifestationUrl
22
- manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
23
- return manifestation
24
- end
25
- def canonicalManifestation
26
- url = self.canonicalManifestationUrl
27
- manifestationObj = Manifestation.new(url)
28
- return manifestationObj
29
- end
30
- def canonicalManifestation?
31
- if self.canonicalManifestationUrl == nil
32
- return false
33
- else
34
- return true
35
- end
12
+
13
+ def structure_type #returns resource identifier
14
+ value("http://scta.info/property/structureType")
36
15
  end
37
- # cannonical transcriptions refers to the canonical trancription
38
- # of the canonical manifestation
39
- def canonicalTranscriptionUrl
40
- manifestationObj = self.canonicalManifestation
41
- url = manifestationObj.canonicalTranscriptionUrl
42
- return url
43
- end
44
- def canonicalTranscription
45
- url = self.canonicalTranscriptionUrl
46
- transcriptionObj = Transcription.new(url)
47
- return transcriptionObj
48
- end
49
- def canonicalTranscription?
50
- if self.canonicalManifestation? == false
51
- return false
52
- else
53
- if self.canonicalTranscriptionUrl == nil
54
- return false
55
- else
56
- return true
57
- end
58
- end
16
+ def manifestations # returns array of available manifestations as ResourceIdentifiers
17
+ values("http://scta.info/property/hasManifestation")
59
18
  end
60
- def transcriptionUrl(manifestationUrl)
61
- manifestationObj = Manifestation.new(manifestationUrl)
62
- transcriptionObj = manifestationObj.canonicalTranscriptionUrl
63
- return transcriptionObj
19
+ def canonical_manifestation # returns a single manifestation ResourceIdentifier
20
+ value("http://scta.info/property/hasCanonicalManifestation")
64
21
  end
65
- def transcription(manifestationUrl)
66
- manifestationObj = Manifestation.new(manifestationUrl)
67
- transcriptionObj = manifestationObj.canonicalTranscription
68
- return transcriptionObj
22
+ def canonical_manifestation? # returns boolean
23
+ !canonical_manifestation.to_s.nil?
69
24
  end
70
- def next
71
- unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).count == 0
72
- next_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).first[:o].to_s
73
- else
74
- next_expression = nil
75
- end
76
- return next_expression
25
+ # translations are a subclass of manifestations for any kind of manifestation not in the original language
26
+ # note that this currently means the manifestations methods, will not grab translation-manifestations,
27
+ # these must be called with translations method
28
+ def translations
29
+ values("http://scta.info/property/hasTranslation")
77
30
  end
78
- def previous
79
- unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).count == 0
80
- previous_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).first[:o].to_s
81
- else
82
- previous_expression = nil
31
+ def canonical_translation
32
+ values("http://scta.info/property/hasCanonicalTranslation")
33
+ end
34
+ def canonical_translation?
35
+ !canonical_translation.to_s.nil?
36
+ end
37
+ # cannonical transcriptions refers to the canonical trancription of the canonical manifestation
38
+ def canonical_transcription # returns single transcription as ResourceIdentifier
39
+ manifestation = canonical_manifestation
40
+ unless manifestation == nil
41
+ return manifestation.resource.canonical_transcription
83
42
  end
84
- return previous_expression
85
43
  end
86
- def order_number
44
+ def canonical_transcription? #returns boolean
45
+ !canonical_transcription.nil?
46
+ end
47
+
48
+ def next # returns resource identifier of next expression or nil
49
+ value("http://scta.info/property/next")
50
+ end
51
+ def previous #returns ResourceIdentifier or nil
52
+ value("http://scta.info/property/previous")
53
+ end
54
+ def order_number # returns integer
87
55
  ## TODO: consider changing property so that there is more symmetry here
88
- if self.structureType_shortId == "structureBlock"
89
- ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/paragraphNumber")).first[:o].to_s.to_i
56
+ if structure_type.short_id == "structureBlock"
57
+ value("http://scta.info/property/paragraphNumber").to_s.to_i
90
58
  else
91
- ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/totalOrderNumber")).first[:o].to_s.to_i
59
+ value("http://scta.info/property/totalOrderNumber").to_s.to_i
92
60
  end
93
- return ordernumber
94
61
  end
95
- def status
96
- status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/status")).first[:o].to_s
62
+ def status #returns string
63
+ value("http://scta.info/property/status").to_s
97
64
  end
98
-
99
- def top_level_expression_url
65
+ def top_level_expression # returns resource identifier
100
66
  #TODO make sure this can handle different structure types
101
- status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfTopLevelExpression")).first[:o].to_s
102
- end
103
- def top_level_expression_shortId
104
- self.top_level_expression_url.split("/").last
67
+ value("http://scta.info/property/isPartOfTopLevelExpression")
105
68
  end
106
- def top_level_expression
107
- expression = Expression.new(self.top_level_expression_url)
108
- end
109
-
110
- def item_level_expression_url
69
+ def item_level_expression # returns resource identifier
111
70
  #TODO make sure this can handle different structure types
112
- status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfStructureItem")).first[:o].to_s
113
- end
114
- def item_level_expression_shortId
115
- self.item_level_expression_url.split("/").last
71
+ value("http://scta.info/property/isPartOfStructureItem")
116
72
  end
117
- def item_level_expression
118
- expression = Expression.new(self.item_level_expression_url)
119
- end
120
- def level
121
- result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
122
- unless self.results.count == 0
123
- level = result.to_s.to_i
124
- else
125
- level = nil
126
- end
127
- return level
73
+ def level # returns resource integer
74
+ #same comment as earlier; this query does not actually return a uri,
75
+ #but an litteral. We need to make sure the resource identifer can handle that
76
+ value("http://scta.info/property/level").to_s.to_i
128
77
  end
129
78
 
130
- # connection properties
131
- #TODO: notice how all these return RDF::Solutions (or some RDF:: object)
132
- # rather already performing the conversion to strings as is done in all the above methods
133
- # this should be standardized
134
- def abbreviates
135
- abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
79
+ def abbreviates # returns array of ResourceIdentifiers
80
+ values("http://scta.info/property/abbreviates")
136
81
  end
137
82
  def abbreviatedBy
138
- abbreviatedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviatedBy"))
83
+ values("http://scta.info/property/abbreviatedBy")
139
84
  end
140
85
  def references
141
- references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/references"))
86
+ values("http://scta.info/property/references")
142
87
  end
143
88
  def referencedBy
144
- references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/referencedBy"))
89
+ values("http://scta.info/property/referencedBy")
145
90
  end
146
91
  def copies
147
- copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copies"))
92
+ values("http://scta.info/property/copies")
148
93
  end
149
94
  def copiedBy
150
- copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copiedBy"))
95
+ values("http://scta.info/property/copiedBy")
151
96
  end
152
97
  def mentions
153
- mentions = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/mentions"))
98
+ values("http://scta.info/property/mentions")
154
99
  end
155
100
  def quotes
156
- quotes = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotes"))
101
+ values("http://scta.info/property/quotes")
157
102
  end
158
103
  def quotedBy
159
- quotedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotedBy"))
104
+ values("http://scta.info/property/quotedBy")
160
105
  end
161
106
 
162
107
  end
@@ -0,0 +1,5 @@
1
+ module Lbp
2
+ class ExpressionType < Resource
3
+
4
+ end
5
+ end
@@ -6,45 +6,69 @@ require 'lbp'
6
6
 
7
7
  module Lbp
8
8
  # class should be renamed to Transcription
9
- class File
9
+ class File
10
10
  attr_reader :xslt_dir, :file_path
11
11
 
12
12
  def initialize(filepath, transcription_type, confighash)
13
13
  @file_path = filepath
14
14
  @confighash = confighash
15
- @xslthash = @confighash[:xslt_dirs]
16
15
 
17
- @type = transcription_type # critical or documentary
16
+ unless confighash == nil
17
+ @stylesheets = @confighash[:stylesheets]
18
+ # identify propery xslt directory
19
+ end
18
20
 
19
- #xslt version needs to gathered from a method
20
- xslt_version = nil
21
- #for now its being set to nil because no documents currently declare it
21
+ # get trancription type from xmlfile
22
+ @transcription_type = transcription_type # critical or documentary # there is also a method for this if one needs to get the type from the file itself
22
23
 
23
- if xslt_version == nil
24
- @schema = @xslthash["default"]
25
- else
26
- @schema = @xslthash[xslt_version]
27
- end
24
+ # get xslt_version from xmlfile
25
+ @xslt_version = self.validating_schema_version
26
+
27
+ unless confighash == nil
28
+ @xslt_dir = "#{@confighash[:xslt_base]}#{@xslt_version}/#{@transcription_type}/"
29
+ end
28
30
 
29
- if @type == 'critical' || @type == 'Critical'
30
- @xslt_dir = @schema[:critical]
31
- elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
32
- @xslt_dir = @schema[:documentary]
33
- end
34
31
  end
35
-
32
+
36
33
  def file
37
- #TODO: needs to be written so auth is only need after request without
38
- #auth is rejected
39
-
40
- #file = open(self.file_path)
41
- file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
34
+ file = open(self.file_path)
35
+ if file.base_uri.to_s != self.file_path
36
+ file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
37
+ end
42
38
  return file
43
39
  end
44
40
  def nokogiri
45
41
  xmldoc = Nokogiri::XML(self.file)
46
42
  end
47
43
  ## End File Path Methods
44
+
45
+ ## Get transcription type
46
+ def transcription_type_from_file
47
+ xmldoc = self.nokogiri
48
+
49
+ result = xmldoc.xpath("/tei:TEI/tei:text[1]/@type", 'tei' => 'http://www.tei-c.org/ns/1.0')
50
+
51
+ if result.length > 0
52
+ return result.to_s
53
+ else
54
+ return "unknown"
55
+ end
56
+
57
+ end
58
+ ## get validating schema label
59
+ def validating_schema_version
60
+ xmldoc = self.nokogiri
61
+ result = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:schemaRef[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
62
+ if result.length > 0
63
+ return result.to_s.split("-").last
64
+ else
65
+ return "default"
66
+ end
67
+ end
68
+
69
+ def transcription_type
70
+
71
+ end
48
72
  ### Item Header Extraction and Metadata Methods
49
73
  def title
50
74
  xmldoc = self.nokogiri
@@ -72,9 +96,13 @@ module Lbp
72
96
  return ed_date.value
73
97
  end
74
98
  def pub_date
75
- xmldoc = self.nokogiri
76
- pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
77
- return pub_date.value
99
+ if self.validating_schema_version == "1.0.0"
100
+ return "no pub date in this schema"
101
+ else
102
+ xmldoc = self.nokogiri
103
+ pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
104
+ return pub_date.value
105
+ end
78
106
  end
79
107
  def encoding_method
80
108
  xmldoc = self.nokogiri
@@ -89,7 +117,7 @@ module Lbp
89
117
  def number_of_columns
90
118
  xmldoc = self.nokogiri
91
119
  test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
92
- if @type == "critical"
120
+ if @transcription_type == "critical"
93
121
  number_of_columns = nil
94
122
  elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
95
123
  number_of_columns = 1
@@ -107,35 +135,35 @@ module Lbp
107
135
  doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
108
136
  end
109
137
  def transform_main_view(xslt_param_array=[])
110
- xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
138
+ xsltfile=@xslt_dir + @stylesheets[:main_view] # "text_display.xsl"
111
139
  doc = self.transform_apply(xsltfile, xslt_param_array)
112
140
  end
113
141
  def transform_index_view(xslt_param_array=[])
114
- xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
142
+ xsltfile=@xslt_dir + @stylesheets[:index_view] # "text_display_index.xsl"
115
143
  doc = self.transform_apply(xsltfile, xslt_param_array)
116
144
  end
117
145
  def transform_clean(xslt_param_array=[])
118
- xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
146
+ xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
119
147
  doc = self.transform_apply(xsltfile, xslt_param_array)
120
148
  end
121
149
  def transform_clean_nokogiri(xslt_param_array=[])
122
- xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
150
+ xsltfile=@xslt_dir + @stylesheets[:clean_view] # "clean_forStatistics.xsl"
123
151
  doc = self.transform(xsltfile, xslt_param_array)
124
152
  end
125
153
  def transform_plain_text(xslt_param_array=[])
126
- xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
154
+ xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
127
155
  doc = self.transform_apply(xsltfile, xslt_param_array)
128
156
  end
129
157
  def transform_plain_text_nokogiri(xslt_param_array=[])
130
- xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
158
+ xsltfile=@xslt_dir + @stylesheets[:plain_text] # "plaintext.xsl"
131
159
  doc = self.transform(xsltfile, xslt_param_array)
132
160
  end
133
161
  def transform_json(xslt_param_array=[])
134
- xsltfile=@xslt_dir + @schema[:json] # "plaintext.xsl"
162
+ xsltfile=@xslt_dir + @stylesheets[:json] # "plaintext.xsl"
135
163
  doc = self.transform_apply(xsltfile, xslt_param_array)
136
164
  end
137
165
  def transform_toc(xslt_param_array=[])
138
- xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
166
+ xsltfile=@xslt_dir + @stylesheets[:toc] # "lectio_outline.xsl"
139
167
  doc = self.transform_apply(xsltfile, xslt_param_array)
140
168
  end
141
169
  ### End of Transformation Methods ###
@@ -153,8 +181,8 @@ module Lbp
153
181
  word_array = self.word_array
154
182
  wf = Hash.new(0)
155
183
  word_array.each { |word| wf[word] += 1 }
156
-
157
- if sort == "frequency"
184
+
185
+ if sort == "frequency"
158
186
  if order == "descending" # high to low
159
187
  wf = wf.sort_by{|k,v| v}.reverse
160
188
  elsif order == "ascending" # low to high
@@ -170,4 +198,4 @@ module Lbp
170
198
  return wf.to_h
171
199
  end
172
200
  end
173
- end
201
+ end