lbp 0.0.2 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,131 +0,0 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
-
5
-
6
- module Lbp
7
- class Collection
8
- #attr_reader :confighash
9
- def initialize(projectfile)
10
- #@confighash = self.confighash
11
- #@projectdatafile_dir = @confighash[:projectdatafile_dir]
12
- @projectfile = projectfile
13
- end
14
-
15
- def title
16
- file = Nokogiri::XML(File.read(@projectfile))
17
- title = file.xpath(("//header/collectionTitle")).text
18
- end
19
- def local_texts_dir
20
- file = Nokogiri::XML(File.read(@projectfile))
21
- textdir = file.xpath(("//header/localTextsDirectory")).text
22
- end
23
-
24
- def citation_lists_dir
25
- file = Nokogiri::XML(File.read(@projectfile))
26
- citationlistdir = file.xpath(("//header/citationListsDirectory")).text
27
- end
28
- def git_repo
29
- file = Nokogiri::XML(File.read(@projectfile))
30
- gitrepo = file.xpath("//header/git_repo").text
31
- end
32
- #need test
33
- def git_clone(username: nil, password: nil)
34
- self.items.each do |item|
35
- item.git_clone(username: username, password: password)
36
- end
37
- end
38
-
39
- def xslt_dirs
40
- #test change to hash
41
- @xslthash = Hash.new
42
- file = Nokogiri::XML(File.read(@projectfile))
43
- schemas = file.xpath("//header/xsltDirectories/schema")
44
-
45
- schemas.each do |schema|
46
- schema_number = schema.attributes["version"].value
47
- schema_default = schema.attributes["default"].value
48
- @xslthash["#{schema_number}"] = {
49
- critical: schema.children.find {|child| child.name == "critical"}.text,
50
- documentary: schema.children.find {|child| child.name == "documentary"}.text,
51
- main_view: schema.children.find {|child| child.name == "main_view"}.text,
52
- index_view: schema.children.find {|child| child.name == "index_view"}.text,
53
- clean_view: schema.children.find {|child| child.name == "clean_view"}.text,
54
- plain_text: schema.children.find {|child| child.name == "plain_text"}.text,
55
- toc: schema.children.find {|child| child.name == "toc"}.text
56
- }
57
- if schema_default == 'true'
58
- @xslthash["default"] = {
59
- critical: schema.children.find {|child| child.name == "critical"}.text,
60
- documentary: schema.children.find {|child| child.name == "documentary"}.text,
61
- main_view: schema.children.find {|child| child.name == "main_view"}.text,
62
- index_view: schema.children.find {|child| child.name == "index_view"}.text,
63
- clean_view: schema.children.find {|child| child.name == "clean_view"}.text,
64
- plain_text: schema.children.find {|child| child.name == "plain_text"}.text,
65
- toc: schema.children.find {|child| child.name == "toc"}.text
66
- }
67
- end
68
-
69
- end
70
- return @xslthash
71
-
72
- end
73
-
74
- def confighash
75
- confighash = {
76
- local_texts_dir: self.local_texts_dir,
77
- citation_lists_dir: self.citation_lists_dir,
78
- xslt_dirs: self.xslt_dirs,
79
- git_repo: self.git_repo}
80
- end
81
-
82
- def items
83
- file = Nokogiri::XML(File.read(@projectfile))
84
- result = file.xpath("//div[@id='body']//item/fileName/@filestem")
85
- fs_array = result.map do |fs|
86
- Item.new(@projectfile, fs.value)
87
- end
88
- return fs_array
89
- end
90
- def item(fs)
91
- Item.new(@projectfile, fs)
92
- end
93
-
94
- def item_filestems
95
- file = Nokogiri::XML(File.read(@projectfile))
96
- result = file.xpath("//div[@id='body']//item/fileName/@filestem")
97
-
98
- fs_array = result.map do |fs|
99
- fs.value
100
- end
101
- return fs_array
102
- end
103
-
104
- def item_titles
105
- file = Nokogiri::XML(File.read(@projectfile))
106
- result = file.xpath("//div[@id='body']//item/title")
107
-
108
- title_array = result.map do |title|
109
- title.text
110
- end
111
- return title_array
112
- end
113
-
114
- def items_fs_title_hash
115
- file = Nokogiri::XML(File.read(@projectfile))
116
- result = file.xpath("//div[@id='body']//item")
117
-
118
- fs_title_hash = Hash.new
119
-
120
- result.each do |item|
121
- title = item.children.find {|child| child.name == "title"}.text
122
- fs = item.children.find {|child| child.name == "fileName"}.attributes["filestem"].value
123
- fs_title_hash[fs] = title
124
- end
125
- return fs_title_hash
126
-
127
-
128
- end
129
-
130
- end
131
- end
@@ -1,153 +0,0 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
- require 'lbp/transcription'
5
-
6
- module Lbp
7
- class Item
8
- attr_reader :fs, :local_texts_dir, :file_dir, :projectfile, :xslt_dir
9
-
10
- def initialize(projectfile, fs)
11
- @fs = fs
12
- @projectfile = projectfile
13
-
14
- @confighash = Collection.new(projectfile).confighash
15
- @texts_dir = @confighash[:local_texts_dir]
16
- @file_dir = @confighash[:local_texts_dir] + @fs + "/"
17
-
18
- end
19
- ### Item Header Extraction and Metadata Methods
20
- def title
21
- transcr = Transcription.new(@projectfile, self.file_hash)
22
- transcr.title
23
- end
24
-
25
- ### Begin GIT functions ###
26
- def is_git_dir
27
- gitpath = @file_dir + ".git"
28
-
29
- if File.directory?(gitpath)
30
- true
31
- else
32
- false
33
- end
34
- end
35
- def git_branches
36
- repo = Rugged::Repository.new(@file_dir)
37
- branches = repo.branches.map { |branch| branch.name }
38
- return branches
39
- end
40
- def git_current_branch
41
- repo = Rugged::Repository.new(@file_dir)
42
- current_branch = repo.head.name.gsub(%r!\Arefs/heads/(.*)\z!) { $1 }
43
- return current_branch
44
- end
45
- def git_tags
46
- repo = Rugged::Repository.new(@file_dir)
47
- tags = repo.tags.map { |tag| tag.name }
48
- return tags
49
- end
50
- #need test for this
51
- def git_checkout(branch)
52
- repo = Rugged::Repository.new(@file_dir)
53
- repo.checkout(branch)
54
- end
55
- def git_construct_remote_path
56
- remote_path = "https://#{@confighash[:git_repo]}#{@fs}.git";
57
- end
58
-
59
- def git_username_password_credentials(username, password)
60
- Rugged::Credentials::UserPassword
61
- credentials = Rugged::Credentials::UserPassword.new(:username=>username, :password=>password)
62
- return credentials
63
- end
64
- #needs a test
65
- def git_clone(username: nil, password: nil)
66
- remote_path = self.git_construct_remote_path
67
- Rugged::Repository.clone_at(remote_path, @file_dir, :credentials => self.git_username_password_credentials(username, password))
68
- end
69
- #nneds a test
70
- def git_pull(username: nil, password: nil)
71
- # not sure what the Rugged API is for this.
72
- # doesn't like this methods has been created
73
- # for now it may have to be constructed from fetch and merge
74
- # or my method 'git_pull' could simply delete the existing repository and the re-lcone
75
- #this is is what i'm doing below, but it is not ideal
76
- self.remove_local_dir
77
- self.git_clone(username: username, password: password)
78
- end
79
- #needs a test
80
- def remove_local_dir
81
- FileUtils.rm_rf @file_dir
82
- end
83
- ### End Git Methods ###
84
- ### Begin Order Info ##
85
-
86
- # previous and next functions don't handle ends of arrays very well
87
- # they also rely on the "item_filestems" methods which works but should be changed see comments in collection file
88
- def previous
89
- sequence_array = Collection.new(@projectfile).item_filestems
90
- #if sequence_array[sequence_array.index(@fs) - 1 ] != nil
91
- previous_fs = sequence_array[sequence_array.index(@fs) - 1]
92
- previous_item = Item.new(@projectfile, previous_fs)
93
- #else
94
- # previous_item = nil
95
- #end
96
- return previous_item
97
- end
98
- def next
99
- sequence_array = Collection.new(@projectfile).item_filestems
100
- #if sequence_array[@sequence_array.index(@fs) + 1 ] != nil
101
- next_fs = sequence_array[sequence_array.index(@fs) + 1]
102
- next_item = Item.new(@projectfile, next_fs)
103
- #else
104
- # next_item = nil
105
- #end
106
- return next_item
107
- end
108
- def order_number
109
- sequence_array = Collection.new(@projectfile).item_filestems
110
- array_number = sequence_array.index(@fs)
111
- sequence_number = array_number + 1
112
- return sequence_number
113
- end
114
-
115
- def file_path(source: 'local', wit: 'critical', ed: 'master')
116
- if wit == 'critical'
117
- if source == "origin"
118
- file_path = "https://#{@confighash[:git_repo]}#{@fs}/raw/#{ed}/#{@fs}.xml"
119
- else
120
- file_path = @file_dir + @fs + ".xml"
121
- end
122
- else
123
- if source == "origin"
124
- file_path = "http://#{@confighash[:git_repo]}#{@fs}/raw/#{ed}/#{wit}_#{@fs}.xml"
125
- else
126
- file_path = @file_dir + wit + "_" + @fs + ".xml"
127
- end
128
- end
129
- return file_path
130
- end
131
- def file_hash(source: 'local', wit: 'critical', ed: 'master')
132
- type = if wit == "critical" then "critical" else "documentary" end
133
- filehash = {path: self.file_path(source: source, wit: wit, ed: ed), fs: @fs, ed: ed, type: type, source: source}
134
-
135
- return filehash
136
- end
137
-
138
- def transcription(source: 'local', wit: 'critical', ed: 'master')
139
- filehash = self.file_hash(source: source, wit: wit, ed: ed)
140
- transcr = Transcription.new(@projectfile, filehash)
141
- end
142
- def transcriptions(source: 'local', ed: 'master')
143
- file = Nokogiri::XML(File.read(@projectfile))
144
- parts = file.xpath("//item[fileName/@filestem='#{@fs}']/hasParts/part/slug")
145
- transcription_array = parts.map do |part|
146
- self.transcription(source: source, wit: part.text, ed: ed)
147
- end
148
- transcription_array << self.transcription(source: source, wit: 'critical', ed: ed)
149
-
150
- return transcription_array
151
- end
152
- end
153
- end
@@ -1,52 +0,0 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
- require 'lbp/transcription'
5
-
6
- module Lbp
7
- class ItemGroup
8
- attr_reader :igid
9
-
10
- def initialize(projectfile, igid)
11
- @igid = igid
12
- @projectfile = projectfile
13
-
14
- end
15
-
16
- def items
17
- file = Nokogiri::XML(File.read(@projectfile))
18
- result = file.xpath("//div[@id='#{@igid}']//item/fileName/@filestem")
19
- fs_array = result.map do |fs|
20
- Item.new(@projectfile, fs.value)
21
- end
22
- return fs_array
23
- end
24
- def item(fs)
25
- Item.new(@projectfile, fs)
26
- end
27
- def title
28
- file = Nokogiri::XML(File.read(@projectfile))
29
- result = file.xpath("//div[@id='#{@igid}']/head")
30
- return result.text
31
- end
32
- def has_sub_group?
33
- file = Nokogiri::XML(File.read(@projectfile))
34
- result = file.xpath("//div[@id='#{@igid}']//div")
35
- if result.count == 0
36
- false
37
- else
38
- true
39
- end
40
- end
41
- def has_parent_group?
42
- #I sort of hate this method. But it sort of works, though I can imagine problems.
43
- file = Nokogiri::XML(File.read(@projectfile))
44
- result = file.xpath("//div[@id='#{@igid}'][@class='toplevel']")
45
- if result.count == 0
46
- true
47
- else
48
- false
49
- end
50
- end
51
- end
52
- end
@@ -1,87 +0,0 @@
1
- require 'nokogiri'
2
- require 'rugged'
3
- require 'lbp/functions'
4
-
5
- module Lbp
6
- class Paragraph
7
- attr_reader :pid
8
- def initialize(projectfile, filehash, pid)
9
-
10
- @projectfile = projectfile
11
- @filehash = filehash
12
- @pid = pid
13
-
14
- @confighash = Collection.new(@projectfile)
15
- end
16
-
17
- def number
18
- transcr = Transcription.new(@projectfile, @filehash)
19
- totalparagraphs = transcr.number_of_body_paragraphs
20
- xmlobject = transcr.nokogiri
21
- paragraphs_following = xmlobject.xpath("//tei:body//tei:p[preceding::tei:p[@xml:id='#{@pid}']]", 'tei' => 'http://www.tei-c.org/ns/1.0').count
22
- paragraph_number = totalparagraphs - paragraphs_following
23
-
24
- return paragraph_number
25
- end
26
- def next
27
- xmlobject = Transcription.new(@projectfile, @filehash).nokogiri
28
- nextpid = xmlobject.xpath("//tei:p[@xml:id='#{@pid}']/following::tei:p[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
29
- if nextpid.text == nil
30
- return nil
31
- else
32
- return Paragraph.new(@projectfile, @filehash, nextpid.text)
33
- end
34
- end
35
- def previous
36
- xmlobject = Transcription.new(@projectfile, @filehash).nokogiri
37
- previouspid = xmlobject.xpath("//tei:p[@xml:id='#{@pid}']/preceding::tei:p[1]/@xml:id", 'tei' => 'http://www.tei-c.org/ns/1.0')
38
- if previouspid.empty?
39
- return nil
40
- else
41
- return Paragraph.new(@projectfile, @filehash, previouspid.text)
42
- end
43
- end
44
- def transform(xsltfile, xslt_param_array=[])
45
- result = Transcription.new(@projectfile, @filehash).transform(xsltfile, xslt_param_array)
46
- p = result.xpath("//p[@id='#{@pid}']")
47
- return p
48
- end
49
- def transform_plain_text(xslt_param_array=[])
50
- # not that it could be slightly confusing that paragraph plain text uses the transform clean,
51
- # because we still the basic paragraph elements in order to select the desired paragraph
52
- result = Transcription.new(@projectfile, @filehash).transform_clean(xslt_param_array)
53
-
54
- p = result.xpath("//p[@id='#{@pid}']")
55
- return p
56
- end
57
- def word_count
58
- plaintext = self.transform_plain_text
59
- size = plaintext.text.split.size
60
- end
61
- def word_array
62
- plaintext = self.transform_plain_text
63
- word_array = plaintext.text.split
64
- word_array.map!{ |word| word.downcase}
65
- end
66
- def word_frequency(sort='frequency', order='descending')
67
- word_array = self.word_array
68
- wf = Hash.new(0)
69
- word_array.each { |word| wf[word] += 1 }
70
-
71
- if sort == "frequency"
72
- if order == "descending" # high to low
73
- wf = wf.sort_by{|k,v| v}.reverse
74
- elsif order == "ascending" # low to high
75
- wf = wf.sort_by{|k,v| v}
76
- end
77
- elsif sort == "word"
78
- if order == "descending" # z - a
79
- wf = wf.sort_by{|k,v| k}.reverse
80
- elsif order == "ascending" #a - z
81
- wf = wf.sort_by{|k,v| k}
82
- end
83
- end
84
- return wf.to_h
85
- end
86
- end
87
- end
@@ -1,60 +0,0 @@
1
- require 'spec_helper'
2
- require 'lbp'
3
- require 'pry'
4
- require 'nokogiri'
5
-
6
- describe 'collection object' do
7
- require_relative "config_globals"
8
-
9
- $collection_obj = Lbp::Collection.new($pg_projectfile)
10
-
11
- it 'should get list of item filestems in sequenced array' do
12
- result = $collection_obj.item_filestems
13
- expect(result).to be_kind_of(Array)
14
- end
15
- it 'should get a list of item names in sequenced array' do
16
- result = $collection_obj.item_titles
17
- expect(result).to be_kind_of(Array)
18
- end
19
- it 'should return a hash of filestems and item names' do
20
- result = $collection_obj.items_fs_title_hash
21
- expect(result).to be_kind_of(Hash)
22
- end
23
-
24
-
25
-
26
- it 'should get list of item objects in an array' do
27
- result = $collection_obj.items
28
- #reunning result.first.title returns ERROR!!!
29
- expect(result).to be_kind_of(Array)
30
- end
31
-
32
- it 'should return local texts dir' do
33
- result = $collection_obj.local_texts_dir
34
- expect(result).to be_kind_of(String)
35
- end
36
-
37
- it 'should return general repo directory' do
38
- result = $collection_obj.git_repo
39
-
40
- expect(result).to be_kind_of(String)
41
- end
42
- it 'should return citation lists directory' do
43
- result = $collection_obj.citation_lists_dir
44
- expect(result).to be_kind_of(String)
45
- end
46
- it 'should return xslt hash' do
47
- result = $collection_obj.xslt_dirs
48
- expect(result).to be_kind_of(Hash)
49
- end
50
- it 'should return a specific item object when a specific item group id is given' do
51
- result = $collection_obj.item('lectio1')
52
- expect(result).to be_kind_of(Lbp::Item)
53
- end
54
- it 'should return the title of a given collection specified in the project data file' do
55
- result = $collection_obj.title
56
- expect(result).to be_kind_of(String)
57
- end
58
-
59
-
60
- end