lbp 0.0.2 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d49f877ac5360958e5c62b4701c97b2a897112be
4
- data.tar.gz: 3fc110f62a9545f20785e8b8b01d609c7ab12e68
3
+ metadata.gz: be83290049ce0d3c203adf4e3ff2139a709ab72c
4
+ data.tar.gz: cfdfe9478a8f86ddd3e381c391cfc97142241f45
5
5
  SHA512:
6
- metadata.gz: 4113fffbc328ab020d952eb88dfaf2ebe01bd12abdc929473052f3b2d307b606f00536702b87c45a68b2d1f36c98df5c5c20d268cd5bb54178079b2ba23f740a
7
- data.tar.gz: d2d3535865aaf51cfd8abd3af442bc2d6b830a2f2c97419ad5416c6ed1fe0e9acd4ae10be71140197eed348cd00f30d956b6a4f18177ec89ea7d2abcc44010f8
6
+ metadata.gz: cc21ee397365f89a2194178c533d36fe42d91bebf472561234002a8a9e9e84ca3b61553daa12baf4a5aad9f55f78edbaadf5950e1ffd58cae52e8b1a253d6fe9
7
+ data.tar.gz: 0eb62da1320ecb9377ff83a5ce4fa3369117fa510af19c5160580823dcf0e00940b8f39750c5d2eddbddf28c9b9dd377143fad7e6ed2b31a9946f9b507b3ad31
data/.gitignore CHANGED
@@ -12,4 +12,8 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ /projectfiles/
16
+ /pp-projectfiles/
17
+ /pg-projectfiles/
18
+ /aw-projectfiles/
15
19
 
@@ -1 +1 @@
1
- lbp
1
+ default
@@ -1 +1 @@
1
- ruby-2.2.0
1
+ ruby-2.2.1
data/bin/lbp CHANGED
@@ -65,7 +65,7 @@ class LbpCli < Thor
65
65
 
66
66
  itemarray << headerdata
67
67
 
68
- data.query(:predicate => RDF::DC.hasPart).each do |part|
68
+ data.query(:predicate => RDF::URI.new("http://scta.info/property/hasItem")).each do |part|
69
69
 
70
70
  newresource = RDF::Resource.new(part.object)
71
71
  newgraph = RDF::Graph.load(newresource)
@@ -78,13 +78,44 @@ class LbpCli < Thor
78
78
 
79
79
  unless status == 'Not Started'
80
80
  title = newdata.query(:predicate => RDF::DC11.title).first.object
81
+ if newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).count == 0
82
+ question_title = "unknown"
83
+ else
84
+ question_title = newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).first.object
85
+ end
81
86
  id = URI(part.object.to_s).path.split('/').last
82
87
  itemunit = "
83
88
  <item live='#{status}'>
84
89
  <fileName filestem='#{id}'>#{id}.xml</fileName>
85
90
  <title>#{title}</title>
86
- </item>\n"
91
+ <questionTitle>#{question_title}</questionTitle>"
87
92
  itemarray << itemunit
93
+ #eventually hasPart property should be sctap:hasTranscription
94
+ hastranscriptions = newdata.query(:predicate => RDF::DC.hasPart)
95
+ if hastranscriptions.count > 0
96
+ partunitopen = "\n<hasParts>"
97
+ itemarray << partunitopen
98
+ hastranscriptions.each do |transcription|
99
+ transcription_resource = RDF::Resource.new(transcription.object)
100
+ transcription_graph = RDF::Graph.load(transcription_resource)
101
+ transcription_data = transcription_graph.data
102
+ transcription_title = transcription_data.query(:predicate => RDF::DC11.title).first.object
103
+ transcription_id = URI(transcription.object.to_s).path.split('/').last
104
+ transcription_slug = transcription_id.split("_").first
105
+ transcription_initial = transcription_slug.each_char.first.upcase # not ideal, some initials will be two letters
106
+ partunit = "\n<part>
107
+ <slug>#{transcription_slug}</slug>
108
+ <title>#{transcription_title}</title>
109
+ <initial>#{transcription_initial}</initial>
110
+ </part>"
111
+ itemarray << partunit
112
+ end
113
+ partunitclose = "\n</hasParts>"
114
+ itemarray << partunitclose
115
+ end
116
+ #end
117
+ close_item_unit = "\n</item>\n"
118
+ itemarray << close_item_unit
88
119
  end
89
120
 
90
121
  end
@@ -23,14 +23,13 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "rspec"
24
24
  spec.add_development_dependency "pry"
25
25
  spec.add_runtime_dependency "nokogiri"
26
- spec.add_runtime_dependency "rugged"
26
+ #spec.add_runtime_dependency "rugged"
27
27
  spec.add_runtime_dependency "thor"
28
28
  spec.add_runtime_dependency "rdf"
29
29
  spec.add_runtime_dependency "rdf-rdfxml"
30
+ spec.add_runtime_dependency "rdf-vocab"
30
31
  spec.add_runtime_dependency "rest-client"
32
+ spec.add_runtime_dependency "sparql"
31
33
 
32
34
 
33
-
34
-
35
-
36
35
  end
data/lib/lbp.rb CHANGED
@@ -1,11 +1,21 @@
1
1
  require "lbp/version"
2
2
 
3
+ require 'lbp/functions'
4
+
5
+ #still need review
6
+ require 'lbp/query'
7
+ require 'lbp/paragraph_image'
3
8
 
4
- require 'lbp/functions'
5
- require 'lbp/item'
6
- require 'lbp/paragraph'
9
+
10
+ #new files
11
+ require 'lbp/resource'
12
+ require 'lbp/expression'
13
+ require 'lbp/manifestation'
7
14
  require 'lbp/transcription'
8
- require 'lbp/item_group'
9
- require 'lbp/collection'
15
+ require 'lbp/file'
16
+ require 'lbp/file_part'
17
+
18
+
19
+
10
20
 
11
21
 
@@ -0,0 +1,163 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+
10
+
11
+ module Lbp
12
+ class Expression < Resource
13
+
14
+ #inherits initialization from Resource
15
+
16
+ def manifestationUrls
17
+ results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasManifestation"))
18
+ manifestations = results.map {|m| m[:o].to_s}
19
+ return manifestations
20
+ end
21
+ def canonicalManifestationUrl
22
+ manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
23
+ return manifestation
24
+ end
25
+ def canonicalManifestation
26
+ url = self.canonicalManifestationUrl
27
+ manifestationObj = Manifestation.new(url)
28
+ return manifestationObj
29
+ end
30
+ def canonicalManifestation?
31
+ if self.canonicalManifestationUrl == nil
32
+ return false
33
+ else
34
+ return true
35
+ end
36
+ end
37
+ # cannonical transcriptions refers to the canonical trancription
38
+ # of the canonical manifestation
39
+ def canonicalTranscriptionUrl
40
+ manifestationObj = self.canonicalManifestation
41
+ url = manifestationObj.canonicalTranscriptionUrl
42
+ return url
43
+ end
44
+ def canonicalTranscription
45
+ url = self.canonicalTranscriptionUrl
46
+ transcriptionObj = Transcription.new(url)
47
+ return transcriptionObj
48
+ end
49
+ def canonicalTranscription?
50
+ if self.canonicalManifestation? == false
51
+ return false
52
+ else
53
+ if self.canonicalTranscriptionUrl == nil
54
+ return false
55
+ else
56
+ return true
57
+ end
58
+ end
59
+ end
60
+ def transcriptionUrl(manifestationUrl)
61
+ manifestationObj = Manifestation.new(manifestationUrl)
62
+ transcriptionObj = manifestationObj.canonicalTranscriptionUrl
63
+ return transcriptionObj
64
+ end
65
+ def transcription(manifestationUrl)
66
+ manifestationObj = Manifestation.new(manifestationUrl)
67
+ transcriptionObj = manifestationObj.canonicalTranscription
68
+ return transcriptionObj
69
+ end
70
+ def next
71
+ unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).count == 0
72
+ next_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).first[:o].to_s
73
+ else
74
+ next_expression = nil
75
+ end
76
+ return next_expression
77
+ end
78
+ def previous
79
+ unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).count == 0
80
+ previous_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).first[:o].to_s
81
+ else
82
+ previous_expression = nil
83
+ end
84
+ return previous_expression
85
+ end
86
+ def order_number
87
+ ## TODO: consider changing property so that there is more symmetry here
88
+ if self.structureType_shortId == "structureBlock"
89
+ ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/paragraphNumber")).first[:o].to_s.to_i
90
+ else
91
+ ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/totalOrderNumber")).first[:o].to_s.to_i
92
+ end
93
+ return ordernumber
94
+ end
95
+ def status
96
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/status")).first[:o].to_s
97
+ end
98
+
99
+ def top_level_expression_url
100
+ #TODO make sure this can handle different structure types
101
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfTopLevelExpression")).first[:o].to_s
102
+ end
103
+ def top_level_expression_shortId
104
+ self.top_level_expression_url.split("/").last
105
+ end
106
+ def top_level_expression
107
+ expression = Expression.new(self.top_level_expression_url)
108
+ end
109
+
110
+ def item_level_expression_url
111
+ #TODO make sure this can handle different structure types
112
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfStructureItem")).first[:o].to_s
113
+ end
114
+ def item_level_expression_shortId
115
+ self.item_level_expression_url.split("/").last
116
+ end
117
+ def item_level_expression
118
+ expression = Expression.new(self.item_level_expression_url)
119
+ end
120
+ def level
121
+ result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
122
+ unless self.results.count == 0
123
+ level = result.to_s.to_i
124
+ else
125
+ level = nil
126
+ end
127
+ return level
128
+ end
129
+
130
+ # connection properties
131
+ #TODO: notice how all these return RDF::Solutions (or some RDF:: object)
132
+ # rather already performing the conversion to strings as is done in all the above methods
133
+ # this should be standardized
134
+ def abbreviates
135
+ abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
136
+ end
137
+ def abbreviatedBy
138
+ abbreviatedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviatedBy"))
139
+ end
140
+ def references
141
+ references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/references"))
142
+ end
143
+ def referencedBy
144
+ references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/referencedBy"))
145
+ end
146
+ def copies
147
+ copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copies"))
148
+ end
149
+ def copiedBy
150
+ copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copiedBy"))
151
+ end
152
+ def mentions
153
+ mentions = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/mentions"))
154
+ end
155
+ def quotes
156
+ quotes = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotes"))
157
+ end
158
+ def quotedBy
159
+ quotedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotedBy"))
160
+ end
161
+
162
+ end
163
+ end
@@ -0,0 +1,173 @@
1
+ require 'nokogiri'
2
+ #require 'lbp/functions'
3
+ #require 'lbp/item'
4
+ require 'open-uri'
5
+ require 'lbp'
6
+
7
+ module Lbp
8
+ # class should be renamed to Transcription
9
+ class File
10
+ attr_reader :xslt_dir, :file_path
11
+
12
+ def initialize(filepath, transcription_type, confighash)
13
+ @file_path = filepath
14
+ @confighash = confighash
15
+ @xslthash = @confighash[:xslt_dirs]
16
+
17
+ @type = transcription_type # critical or documentary
18
+
19
+ #xslt version needs to gathered from a method
20
+ xslt_version = nil
21
+ #for now its being set to nil because no documents currently declare it
22
+
23
+ if xslt_version == nil
24
+ @schema = @xslthash["default"]
25
+ else
26
+ @schema = @xslthash[xslt_version]
27
+ end
28
+
29
+ if @type == 'critical' || @type == 'Critical'
30
+ @xslt_dir = @schema[:critical]
31
+ elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
32
+ @xslt_dir = @schema[:documentary]
33
+ end
34
+ end
35
+
36
+ def file
37
+ #TODO: needs to be written so auth is only need after request without
38
+ #auth is rejected
39
+
40
+ #file = open(self.file_path)
41
+ file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
42
+ return file
43
+ end
44
+ def nokogiri
45
+ xmldoc = Nokogiri::XML(self.file)
46
+ end
47
+ ## End File Path Methods
48
+ ### Item Header Extraction and Metadata Methods
49
+ def title
50
+ xmldoc = self.nokogiri
51
+ title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
52
+ return title.text
53
+ end
54
+ def author
55
+ xmldoc = self.nokogiri
56
+ author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
57
+ return author.text
58
+ end
59
+ def editor
60
+ xmldoc = self.nokogiri
61
+ editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
62
+ return editor.text
63
+ end
64
+ def ed_no
65
+ xmldoc = self.nokogiri
66
+ ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
67
+ return ed_no.value
68
+ end
69
+ def ed_date
70
+ xmldoc = self.nokogiri
71
+ ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
72
+ return ed_date.value
73
+ end
74
+ def pub_date
75
+ xmldoc = self.nokogiri
76
+ pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
77
+ return pub_date.value
78
+ end
79
+ def encoding_method
80
+ xmldoc = self.nokogiri
81
+ encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
82
+ return encoding_method.value
83
+ end
84
+ def encoding_location
85
+ xmldoc = self.nokogiri
86
+ encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
87
+ return encoding_location.value
88
+ end
89
+ def number_of_columns
90
+ xmldoc = self.nokogiri
91
+ test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
92
+ if @type == "critical"
93
+ number_of_columns = nil
94
+ elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
95
+ number_of_columns = 1
96
+ elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
97
+ number_of_columns = 2
98
+ end
99
+ return number_of_columns
100
+ end
101
+
102
+ ### Begin transform (XSLT) methocs ###
103
+ def transform(xsltfile, xslt_param_array=[])
104
+ doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array)
105
+ end
106
+ def transform_apply(xsltfile, xslt_param_array=[])
107
+ doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
108
+ end
109
+ def transform_main_view(xslt_param_array=[])
110
+ xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
111
+ doc = self.transform_apply(xsltfile, xslt_param_array)
112
+ end
113
+ def transform_index_view(xslt_param_array=[])
114
+ xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
115
+ doc = self.transform_apply(xsltfile, xslt_param_array)
116
+ end
117
+ def transform_clean(xslt_param_array=[])
118
+ xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
119
+ doc = self.transform_apply(xsltfile, xslt_param_array)
120
+ end
121
+ def transform_clean_nokogiri(xslt_param_array=[])
122
+ xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
123
+ doc = self.transform(xsltfile, xslt_param_array)
124
+ end
125
+ def transform_plain_text(xslt_param_array=[])
126
+ xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
127
+ doc = self.transform_apply(xsltfile, xslt_param_array)
128
+ end
129
+ def transform_plain_text_nokogiri(xslt_param_array=[])
130
+ xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
131
+ doc = self.transform(xsltfile, xslt_param_array)
132
+ end
133
+ def transform_json(xslt_param_array=[])
134
+ xsltfile=@xslt_dir + @schema[:json] # "plaintext.xsl"
135
+ doc = self.transform_apply(xsltfile, xslt_param_array)
136
+ end
137
+ def transform_toc(xslt_param_array=[])
138
+ xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
139
+ doc = self.transform_apply(xsltfile, xslt_param_array)
140
+ end
141
+ ### End of Transformation Methods ###
142
+ ### Begin Statistics Methods ###
143
+ def word_count
144
+ plaintext = self.transform_plain_text
145
+ size = plaintext.split.size
146
+ end
147
+ def word_array
148
+ plaintext = self.transform_plain_text
149
+ word_array = plaintext.split
150
+ word_array.map!{ |word| word.downcase}
151
+ end
152
+ def word_frequency(sort, order)
153
+ word_array = self.word_array
154
+ wf = Hash.new(0)
155
+ word_array.each { |word| wf[word] += 1 }
156
+
157
+ if sort == "frequency"
158
+ if order == "descending" # high to low
159
+ wf = wf.sort_by{|k,v| v}.reverse
160
+ elsif order == "ascending" # low to high
161
+ wf = wf.sort_by{|k,v| v}
162
+ end
163
+ elsif sort == "word"
164
+ if order == "descending" # z - a
165
+ wf = wf.sort_by{|k,v| k}.reverse
166
+ elsif order == "ascending" #a - z
167
+ wf = wf.sort_by{|k,v| k}
168
+ end
169
+ end
170
+ return wf.to_h
171
+ end
172
+ end
173
+ end