lbp 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: d49f877ac5360958e5c62b4701c97b2a897112be
4
- data.tar.gz: 3fc110f62a9545f20785e8b8b01d609c7ab12e68
3
+ metadata.gz: be83290049ce0d3c203adf4e3ff2139a709ab72c
4
+ data.tar.gz: cfdfe9478a8f86ddd3e381c391cfc97142241f45
5
5
  SHA512:
6
- metadata.gz: 4113fffbc328ab020d952eb88dfaf2ebe01bd12abdc929473052f3b2d307b606f00536702b87c45a68b2d1f36c98df5c5c20d268cd5bb54178079b2ba23f740a
7
- data.tar.gz: d2d3535865aaf51cfd8abd3af442bc2d6b830a2f2c97419ad5416c6ed1fe0e9acd4ae10be71140197eed348cd00f30d956b6a4f18177ec89ea7d2abcc44010f8
6
+ metadata.gz: cc21ee397365f89a2194178c533d36fe42d91bebf472561234002a8a9e9e84ca3b61553daa12baf4a5aad9f55f78edbaadf5950e1ffd58cae52e8b1a253d6fe9
7
+ data.tar.gz: 0eb62da1320ecb9377ff83a5ce4fa3369117fa510af19c5160580823dcf0e00940b8f39750c5d2eddbddf28c9b9dd377143fad7e6ed2b31a9946f9b507b3ad31
data/.gitignore CHANGED
@@ -12,4 +12,8 @@
12
12
  *.o
13
13
  *.a
14
14
  mkmf.log
15
+ /projectfiles/
16
+ /pp-projectfiles/
17
+ /pg-projectfiles/
18
+ /aw-projectfiles/
15
19
 
@@ -1 +1 @@
1
- lbp
1
+ default
@@ -1 +1 @@
1
- ruby-2.2.0
1
+ ruby-2.2.1
data/bin/lbp CHANGED
@@ -65,7 +65,7 @@ class LbpCli < Thor
65
65
 
66
66
  itemarray << headerdata
67
67
 
68
- data.query(:predicate => RDF::DC.hasPart).each do |part|
68
+ data.query(:predicate => RDF::URI.new("http://scta.info/property/hasItem")).each do |part|
69
69
 
70
70
  newresource = RDF::Resource.new(part.object)
71
71
  newgraph = RDF::Graph.load(newresource)
@@ -78,13 +78,44 @@ class LbpCli < Thor
78
78
 
79
79
  unless status == 'Not Started'
80
80
  title = newdata.query(:predicate => RDF::DC11.title).first.object
81
+ if newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).count == 0
82
+ question_title = "unknown"
83
+ else
84
+ question_title = newdata.query(:predicate => RDF::URI.new("http://scta.info/property/questionTitle")).first.object
85
+ end
81
86
  id = URI(part.object.to_s).path.split('/').last
82
87
  itemunit = "
83
88
  <item live='#{status}'>
84
89
  <fileName filestem='#{id}'>#{id}.xml</fileName>
85
90
  <title>#{title}</title>
86
- </item>\n"
91
+ <questionTitle>#{question_title}</questionTitle>"
87
92
  itemarray << itemunit
93
+ #eventually hasPart property should be sctap:hasTranscription
94
+ hastranscriptions = newdata.query(:predicate => RDF::DC.hasPart)
95
+ if hastranscriptions.count > 0
96
+ partunitopen = "\n<hasParts>"
97
+ itemarray << partunitopen
98
+ hastranscriptions.each do |transcription|
99
+ transcription_resource = RDF::Resource.new(transcription.object)
100
+ transcription_graph = RDF::Graph.load(transcription_resource)
101
+ transcription_data = transcription_graph.data
102
+ transcription_title = transcription_data.query(:predicate => RDF::DC11.title).first.object
103
+ transcription_id = URI(transcription.object.to_s).path.split('/').last
104
+ transcription_slug = transcription_id.split("_").first
105
+ transcription_initial = transcription_slug.each_char.first.upcase # not ideal, some initials will be two letters
106
+ partunit = "\n<part>
107
+ <slug>#{transcription_slug}</slug>
108
+ <title>#{transcription_title}</title>
109
+ <initial>#{transcription_initial}</initial>
110
+ </part>"
111
+ itemarray << partunit
112
+ end
113
+ partunitclose = "\n</hasParts>"
114
+ itemarray << partunitclose
115
+ end
116
+ #end
117
+ close_item_unit = "\n</item>\n"
118
+ itemarray << close_item_unit
88
119
  end
89
120
 
90
121
  end
@@ -23,14 +23,13 @@ Gem::Specification.new do |spec|
23
23
  spec.add_development_dependency "rspec"
24
24
  spec.add_development_dependency "pry"
25
25
  spec.add_runtime_dependency "nokogiri"
26
- spec.add_runtime_dependency "rugged"
26
+ #spec.add_runtime_dependency "rugged"
27
27
  spec.add_runtime_dependency "thor"
28
28
  spec.add_runtime_dependency "rdf"
29
29
  spec.add_runtime_dependency "rdf-rdfxml"
30
+ spec.add_runtime_dependency "rdf-vocab"
30
31
  spec.add_runtime_dependency "rest-client"
32
+ spec.add_runtime_dependency "sparql"
31
33
 
32
34
 
33
-
34
-
35
-
36
35
  end
data/lib/lbp.rb CHANGED
@@ -1,11 +1,21 @@
1
1
  require "lbp/version"
2
2
 
3
+ require 'lbp/functions'
4
+
5
+ #still need review
6
+ require 'lbp/query'
7
+ require 'lbp/paragraph_image'
3
8
 
4
- require 'lbp/functions'
5
- require 'lbp/item'
6
- require 'lbp/paragraph'
9
+
10
+ #new files
11
+ require 'lbp/resource'
12
+ require 'lbp/expression'
13
+ require 'lbp/manifestation'
7
14
  require 'lbp/transcription'
8
- require 'lbp/item_group'
9
- require 'lbp/collection'
15
+ require 'lbp/file'
16
+ require 'lbp/file_part'
17
+
18
+
19
+
10
20
 
11
21
 
@@ -0,0 +1,163 @@
1
+ require 'openssl'
2
+ require 'rdf'
3
+ require 'rdf/rdfxml'
4
+ require 'rdf/ntriples'
5
+ require 'rdf/vocab'
6
+ require 'lbp'
7
+
8
+
9
+
10
+
11
+ module Lbp
12
+ class Expression < Resource
13
+
14
+ #inherits initialization from Resource
15
+
16
+ def manifestationUrls
17
+ results = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasManifestation"))
18
+ manifestations = results.map {|m| m[:o].to_s}
19
+ return manifestations
20
+ end
21
+ def canonicalManifestationUrl
22
+ manifestation = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/hasCanonicalManifestation")).first[:o].to_s
23
+ return manifestation
24
+ end
25
+ def canonicalManifestation
26
+ url = self.canonicalManifestationUrl
27
+ manifestationObj = Manifestation.new(url)
28
+ return manifestationObj
29
+ end
30
+ def canonicalManifestation?
31
+ if self.canonicalManifestationUrl == nil
32
+ return false
33
+ else
34
+ return true
35
+ end
36
+ end
37
+ # cannonical transcriptions refers to the canonical trancription
38
+ # of the canonical manifestation
39
+ def canonicalTranscriptionUrl
40
+ manifestationObj = self.canonicalManifestation
41
+ url = manifestationObj.canonicalTranscriptionUrl
42
+ return url
43
+ end
44
+ def canonicalTranscription
45
+ url = self.canonicalTranscriptionUrl
46
+ transcriptionObj = Transcription.new(url)
47
+ return transcriptionObj
48
+ end
49
+ def canonicalTranscription?
50
+ if self.canonicalManifestation? == false
51
+ return false
52
+ else
53
+ if self.canonicalTranscriptionUrl == nil
54
+ return false
55
+ else
56
+ return true
57
+ end
58
+ end
59
+ end
60
+ def transcriptionUrl(manifestationUrl)
61
+ manifestationObj = Manifestation.new(manifestationUrl)
62
+ transcriptionObj = manifestationObj.canonicalTranscriptionUrl
63
+ return transcriptionObj
64
+ end
65
+ def transcription(manifestationUrl)
66
+ manifestationObj = Manifestation.new(manifestationUrl)
67
+ transcriptionObj = manifestationObj.canonicalTranscription
68
+ return transcriptionObj
69
+ end
70
+ def next
71
+ unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).count == 0
72
+ next_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/next")).first[:o].to_s
73
+ else
74
+ next_expression = nil
75
+ end
76
+ return next_expression
77
+ end
78
+ def previous
79
+ unless self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).count == 0
80
+ previous_expression = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/previous")).first[:o].to_s
81
+ else
82
+ previous_expression = nil
83
+ end
84
+ return previous_expression
85
+ end
86
+ def order_number
87
+ ## TODO: consider changing property so that there is more symmetry here
88
+ if self.structureType_shortId == "structureBlock"
89
+ ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/paragraphNumber")).first[:o].to_s.to_i
90
+ else
91
+ ordernumber = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/totalOrderNumber")).first[:o].to_s.to_i
92
+ end
93
+ return ordernumber
94
+ end
95
+ def status
96
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/status")).first[:o].to_s
97
+ end
98
+
99
+ def top_level_expression_url
100
+ #TODO make sure this can handle different structure types
101
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfTopLevelExpression")).first[:o].to_s
102
+ end
103
+ def top_level_expression_shortId
104
+ self.top_level_expression_url.split("/").last
105
+ end
106
+ def top_level_expression
107
+ expression = Expression.new(self.top_level_expression_url)
108
+ end
109
+
110
+ def item_level_expression_url
111
+ #TODO make sure this can handle different structure types
112
+ status = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/isPartOfStructureItem")).first[:o].to_s
113
+ end
114
+ def item_level_expression_shortId
115
+ self.item_level_expression_url.split("/").last
116
+ end
117
+ def item_level_expression
118
+ expression = Expression.new(self.item_level_expression_url)
119
+ end
120
+ def level
121
+ result = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/level")).first[:o]
122
+ unless self.results.count == 0
123
+ level = result.to_s.to_i
124
+ else
125
+ level = nil
126
+ end
127
+ return level
128
+ end
129
+
130
+ # connection properties
131
+ #TODO: notice how all these return RDF::Solutions (or some RDF:: object)
132
+ # rather already performing the conversion to strings as is done in all the above methods
133
+ # this should be standardized
134
+ def abbreviates
135
+ abbreviates = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviates"))
136
+ end
137
+ def abbreviatedBy
138
+ abbreviatedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/abbreviatedBy"))
139
+ end
140
+ def references
141
+ references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/references"))
142
+ end
143
+ def referencedBy
144
+ references = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/referencedBy"))
145
+ end
146
+ def copies
147
+ copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copies"))
148
+ end
149
+ def copiedBy
150
+ copies = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/copiedBy"))
151
+ end
152
+ def mentions
153
+ mentions = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/mentions"))
154
+ end
155
+ def quotes
156
+ quotes = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotes"))
157
+ end
158
+ def quotedBy
159
+ quotedBy = self.results.dup.filter(:p => RDF::URI("http://scta.info/property/quotedBy"))
160
+ end
161
+
162
+ end
163
+ end
@@ -0,0 +1,173 @@
1
+ require 'nokogiri'
2
+ #require 'lbp/functions'
3
+ #require 'lbp/item'
4
+ require 'open-uri'
5
+ require 'lbp'
6
+
7
+ module Lbp
8
+ # class should be renamed to Transcription
9
+ class File
10
+ attr_reader :xslt_dir, :file_path
11
+
12
+ def initialize(filepath, transcription_type, confighash)
13
+ @file_path = filepath
14
+ @confighash = confighash
15
+ @xslthash = @confighash[:xslt_dirs]
16
+
17
+ @type = transcription_type # critical or documentary
18
+
19
+ #xslt version needs to gathered from a method
20
+ xslt_version = nil
21
+ #for now its being set to nil because no documents currently declare it
22
+
23
+ if xslt_version == nil
24
+ @schema = @xslthash["default"]
25
+ else
26
+ @schema = @xslthash[xslt_version]
27
+ end
28
+
29
+ if @type == 'critical' || @type == 'Critical'
30
+ @xslt_dir = @schema[:critical]
31
+ elsif @type == 'documentary' || @type == 'Documentary' || @type == 'diplomatic'
32
+ @xslt_dir = @schema[:documentary]
33
+ end
34
+ end
35
+
36
+ def file
37
+ #TODO: needs to be written so auth is only need after request without
38
+ #auth is rejected
39
+
40
+ #file = open(self.file_path)
41
+ file = open(self.file_path, {:http_basic_authentication => [@confighash[:git_username], @confighash[:git_password]]})
42
+ return file
43
+ end
44
+ def nokogiri
45
+ xmldoc = Nokogiri::XML(self.file)
46
+ end
47
+ ## End File Path Methods
48
+ ### Item Header Extraction and Metadata Methods
49
+ def title
50
+ xmldoc = self.nokogiri
51
+ title = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:titleStmt[1]/tei:title[1]", 'tei' => 'http://www.tei-c.org/ns/1.0')
52
+ return title.text
53
+ end
54
+ def author
55
+ xmldoc = self.nokogiri
56
+ author = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:author", 'tei' => 'http://www.tei-c.org/ns/1.0')
57
+ return author.text
58
+ end
59
+ def editor
60
+ xmldoc = self.nokogiri
61
+ editor = xmldoc.xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc/tei:titleStmt[1]/tei:editor", 'tei' => 'http://www.tei-c.org/ns/1.0')
62
+ return editor.text
63
+ end
64
+ def ed_no
65
+ xmldoc = self.nokogiri
66
+ ed_no = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/@n", 'tei' => 'http://www.tei-c.org/ns/1.0')
67
+ return ed_no.value
68
+ end
69
+ def ed_date
70
+ xmldoc = self.nokogiri
71
+ ed_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:editionStmt[1]/tei:edition[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
72
+ return ed_date.value
73
+ end
74
+ def pub_date
75
+ xmldoc = self.nokogiri
76
+ pub_date = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:fileDesc[1]/tei:publicationStmt[1]/tei:date[1]/@when", 'tei' => 'http://www.tei-c.org/ns/1.0')
77
+ return pub_date.value
78
+ end
79
+ def encoding_method
80
+ xmldoc = self.nokogiri
81
+ encoding_method = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@method", 'tei' => 'http://www.tei-c.org/ns/1.0')
82
+ return encoding_method.value
83
+ end
84
+ def encoding_location
85
+ xmldoc = self.nokogiri
86
+ encoding_location = xmldoc.at_xpath("/tei:TEI/tei:teiHeader[1]/tei:encodingDesc[1]/tei:variantEncoding[1]/@location", 'tei' => 'http://www.tei-c.org/ns/1.0')
87
+ return encoding_location.value
88
+ end
89
+ def number_of_columns
90
+ xmldoc = self.nokogiri
91
+ test = xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0')
92
+ if @type == "critical"
93
+ number_of_columns = nil
94
+ elsif xmldoc.xpath("//tei:pb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
95
+ number_of_columns = 1
96
+ elsif xmldoc.xpath("//tei:cb", 'tei' => 'http://www.tei-c.org/ns/1.0').count != 0
97
+ number_of_columns = 2
98
+ end
99
+ return number_of_columns
100
+ end
101
+
102
+ ### Begin transform (XSLT) methocs ###
103
+ def transform(xsltfile, xslt_param_array=[])
104
+ doc = xslt_transform(self.nokogiri, xsltfile, xslt_param_array)
105
+ end
106
+ def transform_apply(xsltfile, xslt_param_array=[])
107
+ doc = xslt_apply_to(self.nokogiri, xsltfile, xslt_param_array)
108
+ end
109
+ def transform_main_view(xslt_param_array=[])
110
+ xsltfile=@xslt_dir + @schema[:main_view] # "text_display.xsl"
111
+ doc = self.transform_apply(xsltfile, xslt_param_array)
112
+ end
113
+ def transform_index_view(xslt_param_array=[])
114
+ xsltfile=@xslt_dir + @schema[:index_view] # "text_display_index.xsl"
115
+ doc = self.transform_apply(xsltfile, xslt_param_array)
116
+ end
117
+ def transform_clean(xslt_param_array=[])
118
+ xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
119
+ doc = self.transform_apply(xsltfile, xslt_param_array)
120
+ end
121
+ def transform_clean_nokogiri(xslt_param_array=[])
122
+ xsltfile=@xslt_dir + @schema[:clean_view] # "clean_forStatistics.xsl"
123
+ doc = self.transform(xsltfile, xslt_param_array)
124
+ end
125
+ def transform_plain_text(xslt_param_array=[])
126
+ xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
127
+ doc = self.transform_apply(xsltfile, xslt_param_array)
128
+ end
129
+ def transform_plain_text_nokogiri(xslt_param_array=[])
130
+ xsltfile=@xslt_dir + @schema[:plain_text] # "plaintext.xsl"
131
+ doc = self.transform(xsltfile, xslt_param_array)
132
+ end
133
+ def transform_json(xslt_param_array=[])
134
+ xsltfile=@xslt_dir + @schema[:json] # "plaintext.xsl"
135
+ doc = self.transform_apply(xsltfile, xslt_param_array)
136
+ end
137
+ def transform_toc(xslt_param_array=[])
138
+ xsltfile=@xslt_dir + @schema[:toc] # "lectio_outline.xsl"
139
+ doc = self.transform_apply(xsltfile, xslt_param_array)
140
+ end
141
+ ### End of Transformation Methods ###
142
+ ### Begin Statistics Methods ###
143
+ def word_count
144
+ plaintext = self.transform_plain_text
145
+ size = plaintext.split.size
146
+ end
147
+ def word_array
148
+ plaintext = self.transform_plain_text
149
+ word_array = plaintext.split
150
+ word_array.map!{ |word| word.downcase}
151
+ end
152
+ def word_frequency(sort, order)
153
+ word_array = self.word_array
154
+ wf = Hash.new(0)
155
+ word_array.each { |word| wf[word] += 1 }
156
+
157
+ if sort == "frequency"
158
+ if order == "descending" # high to low
159
+ wf = wf.sort_by{|k,v| v}.reverse
160
+ elsif order == "ascending" # low to high
161
+ wf = wf.sort_by{|k,v| v}
162
+ end
163
+ elsif sort == "word"
164
+ if order == "descending" # z - a
165
+ wf = wf.sort_by{|k,v| k}.reverse
166
+ elsif order == "ascending" #a - z
167
+ wf = wf.sort_by{|k,v| k}
168
+ end
169
+ end
170
+ return wf.to_h
171
+ end
172
+ end
173
+ end