bibletools 0.1.0 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b39f3947e9bf8e827ce070ad544f2f03a5b28e43554588ec99e6511aaf7c7109
4
- data.tar.gz: 6d88f41a49b461adc25993c65e2abbeffc6616b9d727bab0bcc93c3570bcc824
3
+ metadata.gz: c6ae238a57f75318c4238835d64567683523fdcf106e3a4eba8007e833de88ac
4
+ data.tar.gz: bd3471c321d7caf26d360bc66ee44eda5b3276d808de12e4ec450044a5f62515
5
5
  SHA512:
6
- metadata.gz: '09532d8e8b4d88ae66ffdcb7f62aec4c7887cb9b87497deb5b04206f6b3c5cbcbcffa618e2ded85a683a57f43907e8f5e30dcb0d62885481fc0a95540240cad6'
7
- data.tar.gz: ec3bc05ded3b431231eaea8f2e00cde75a4da60bc8cc4fbb6fda1cd3d8312fabd1d50aa7e096742f7beb05b36e306b853b4cd92d446a9bb060333ce544614645
6
+ metadata.gz: 5dddc1de4ef4dc2439a3f3ca8d4e49a7973dd89c06362ac1761be02ed2240a09367b622b3abdac5ad919f23c49ef3e72ee63dbac13bd4818aa3abe57b19dddb7
7
+ data.tar.gz: b39d63a2795fc9468e176ee44e674a79042e2f5608c64099b9604ce8e8d75f75376cd26fd6040760f5c113579fba00cfb1c8ee5d1c345576c1a064d54d9443a4
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/bibletools.rb CHANGED
@@ -6,6 +6,85 @@ require 'rexle'
6
6
  require 'yawc'
7
7
  require 'nokorexi'
8
8
 
9
+ require 'cerecvoice2019'
10
+ require 'english_spellchecker'
11
+
12
+
13
+ BOOKS = [
14
+ "Genesis",
15
+ "Exodus",
16
+ "Leviticus",
17
+ "Numbers",
18
+ "Deuteronomy",
19
+ "Joshua",
20
+ "Judges",
21
+ "Ruth",
22
+ "1 Samuel",
23
+ "2 Samuel",
24
+ "1 Kings",
25
+ "2 Kings",
26
+ "1 Chronicles",
27
+ "2 Chronicles",
28
+ "Ezra",
29
+ "Nehemiah",
30
+ "Tobit",
31
+ "Judith",
32
+ "Esther",
33
+ "1 Maccabees",
34
+ "2 Maccabees",
35
+ "Job",
36
+ "Psalms",
37
+ "The Proverbs",
38
+ "Ecclesiastes",
39
+ "The Song of Songs",
40
+ "Wisdom",
41
+ "Ecclesiasticus / Sirach",
42
+ "Isaiah",
43
+ "Jeremiah",
44
+ "Lamentations",
45
+ "Baruch",
46
+ "Ezekiel",
47
+ "Daniel",
48
+ "Hosea",
49
+ "Joel",
50
+ "Amos",
51
+ "Obadiah",
52
+ "Jonah",
53
+ "Micah",
54
+ "Nahum",
55
+ "Habakkuk",
56
+ "Zephaniah",
57
+ "Haggai",
58
+ "Zechariah",
59
+ "Malachi",
60
+ "Matthew",
61
+ "Mark",
62
+ "Luke",
63
+ "John",
64
+ "Acts of Apostles",
65
+ "Romans",
66
+ "1 Corinthians",
67
+ "2 Corinthians",
68
+ "Galatians",
69
+ "Ephesians",
70
+ "Philippians",
71
+ "Colossians",
72
+ "1 Thessalonians",
73
+ "2 Thessalonians",
74
+ "1 Timothy",
75
+ "2 Timothy",
76
+ "Titus",
77
+ "Philemon",
78
+ "Hebrews",
79
+ "James",
80
+ "1 Peter",
81
+ "2 Peter",
82
+ "1 John",
83
+ "2 John",
84
+ "3 John",
85
+ "Jude",
86
+ "Revelation"
87
+ ]
9
88
 
10
89
  module BibleTools
11
90
 
@@ -20,94 +99,17 @@ module BibleTools
20
99
 
21
100
  baseurl = url
22
101
 
23
- books = ["Genesis",
24
- "Exodus",
25
- "Leviticus",
26
- "Numbers",
27
- "Deuteronomy",
28
- "Joshua",
29
- "Judges",
30
- "Ruth",
31
- "1 Samuel",
32
- "2 Samuel",
33
- "1 Kings",
34
- "2 Kings",
35
- "1 Chronicles",
36
- "2 Chronicles",
37
- "Ezra",
38
- "Nehemiah",
39
- "Tobit",
40
- "Judith",
41
- "Esther",
42
- "1 Maccabees",
43
- "2 Maccabees",
44
- "Job",
45
- "Psalms",
46
- "The Proverbs",
47
- "Ecclesiastes",
48
- "The Song of Songs",
49
- "Wisdom",
50
- "Ecclesiasticus / Sirach",
51
- "Isaiah",
52
- "Jeremiah",
53
- "Lamentations",
54
- "Baruch",
55
- "Ezekiel",
56
- "Daniel",
57
- "Hosea",
58
- "Joel",
59
- "Amos",
60
- "Obadiah",
61
- "Jonah",
62
- "Micah",
63
- "Nahum",
64
- "Habakkuk",
65
- "Zephaniah",
66
- "Haggai",
67
- "Zechariah",
68
- "Malachi",
69
- "Matthew",
70
- "Mark",
71
- "Luke",
72
- "John",
73
- "Acts of Apostles",
74
- "Romans",
75
- "1 Corinthians",
76
- "2 Corinthians",
77
- "Galatians",
78
- "Ephesians",
79
- "Philippians",
80
- "Colossians",
81
- "1 Thessalonians",
82
- "2 Thessalonians",
83
- "1 Timothy",
84
- "2 Timothy",
85
- "Titus",
86
- "Philemon",
87
- "Hebrews",
88
- "James",
89
- "1 Peter",
90
- "2 Peter",
91
- "1 John",
92
- "2 John",
93
- "3 John",
94
- "Jude",
95
- "Revelation"]
96
-
97
102
  id = if book.is_a? String then
98
- (books.index(book) + 1).to_s
103
+ (BOOKS.index(book) + 1).to_s
99
104
  else
100
105
  book.to_s
101
106
  end
102
107
 
103
-
104
-
105
108
  url2 = baseurl + '?id=' + id
106
109
  doc = Nokorexi.new(url2).to_doc
107
110
  e = doc.root.at_css('.pagination')
108
111
  endchapter = e.xpath('li')[-2].text('a')
109
112
 
110
-
111
113
  book = Rexle.new('<book/>')
112
114
 
113
115
  (1..endchapter.to_i).each do |chp|
@@ -143,39 +145,128 @@ module BibleTools
143
145
  def to_doc
144
146
  @doc
145
147
  end
148
+
149
+ def save(title)
150
+
151
+ filename = title.downcase.gsub(/\s+/,'-') + '.xml'
152
+ File.write filename, @doc.root.xml(pretty: true)
153
+ puts filename +' saved!'
154
+
155
+ end
146
156
 
147
157
  end
148
158
 
149
159
  class Analyse
150
160
 
151
- def initialize(bible_obj, debug: false)
161
+ def initialize(bible_obj, tts: nil, debug: false)
152
162
 
153
163
  @debug = debug
154
164
 
155
165
  if bible_obj then
156
166
  @doc = bible_obj.to_doc
157
167
  @verses = @doc.root.xpath('chapter/verse')
168
+ @check = EnglishSpellcheck.new debug: false #verbose: false
169
+
170
+ if tts then
171
+ @cerevoice = Cerecvoice2019.new accountid: tts[:account],
172
+ password: tts[:password], voice: tts[:voice] || 'Stuart'
173
+ end
174
+
175
+ end
176
+
177
+
178
+ end
179
+
180
+ # Automatically select verses; verses are selected based upon word
181
+ # frequency
182
+ #
183
+ def asr()
184
+
185
+ r = assoc_r()
186
+ e = r[1].root.element('chapter/verse')
187
+ verse_no = e.attributes[:no]
188
+ chptr_no = e.parent.attributes[:no]
189
+
190
+ # we are backtracking the results to hopefully find a new branch to explore
191
+ trail = r.first
192
+
193
+ a, trail2 = mine_words(r, trail, level: 2)
194
+ a << [chptr_no, verse_no, e.text, 1]
195
+
196
+ h = a.group_by(&:first)
197
+ a2 = h.sort_by {|x, _| x.to_i}
198
+ a3 = a2.map do |chapter, verses|
199
+ [chapter, verses.sort_by {|_,x| x.to_i}]
158
200
  end
201
+
202
+ [a3, trail2]
203
+ end
204
+
205
+ # find the words associated with a given keyword
206
+ #
207
+ def assoc(keyword, doc=@doc)
208
+ doc2 = self.search keyword, doc
209
+ [self.words(doc: doc2), doc2]
210
+ end
211
+
212
+ def assoc_r(keyword=words().keys.first, doc=@doc, list=[], results=[])
213
+
214
+ result = assoc(keyword, doc)
215
+ results << result
216
+ h, doc2 = result
217
+ new_keyword = h.keys.find {|x| not list.include? x}
218
+ return [list, doc2, results] unless new_keyword
219
+
220
+ list << new_keyword
221
+ assoc_r(new_keyword, doc2, list, results)
159
222
 
160
223
  end
161
224
 
162
- def wordtally()
225
+ def custom_words(doc: @doc, level: 3)
226
+
227
+ a = wordtally(level: level, doc: doc).keys.map do |word|
228
+ puts 'word: ' + word.inspect if @debug
229
+ [word, @check.spelling(word, verbose: false)]
230
+ end
163
231
 
164
- return unless @doc
165
- Yawc.new(@doc.root.plaintext).to_h
232
+ a2 = a.select do |w, x|
233
+ if x.is_a? String then
234
+ w != x
235
+ elsif x.is_a? Array
236
+ not (w == x.join or w == x.first)
237
+ else
238
+ true
239
+ end
240
+ end
241
+ a2.map(&:first)
242
+
243
+ end
244
+
245
+ def wordtally(level: 2, doc: @doc)
246
+
247
+ return unless doc
248
+ Yawc.new(doc.root.plaintext, level: level).to_h
166
249
 
167
250
  end
168
251
 
169
252
  alias words wordtally
170
253
 
171
254
  def read(chapter, verse)
172
- @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")
255
+
256
+ if verse.is_a? Integer then
257
+ [@doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")]
258
+ elsif verse.is_a? Array
259
+ verse.map do |x|
260
+ @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{x}']")
261
+ end
262
+ end
173
263
  end
174
264
 
175
265
 
176
- def search(keyword)
266
+ def search(keyword, doc=@doc)
177
267
 
178
- a = @verses.select {|x| x.text =~ /#{keyword}/}
268
+ verses = doc.root.xpath('chapter/verse')
269
+ a = verses.select {|x| x.text =~ /#{keyword}/i}
179
270
 
180
271
  a2 = a.map.with_index do |verse, i|
181
272
  txt = verse.text
@@ -185,9 +276,10 @@ module BibleTools
185
276
  n = 0
186
277
  chapter = verse.parent.attributes[:no]
187
278
 
188
- until txt.rstrip[-1] == '.' or n > 2 do
279
+ until txt.rstrip[/[\.\?!]\W*$/] or n > 2 do
189
280
  n +=1
190
- nverse = read(chapter, verse.attributes[:no].to_i + n)
281
+ nverse = read(chapter, verse.attributes[:no].to_i + n)[0]
282
+ puts 'nverse: ' + nverse&.text.inspect if @debug
191
283
  txt += nverse.text
192
284
  end
193
285
 
@@ -199,11 +291,69 @@ module BibleTools
199
291
  [verse.parent.attributes[:no], verse.attributes[:no], txt]
200
292
  end
201
293
 
202
- h1 = a2.group_by(&:first)
294
+ # group by chapter
295
+ #
296
+ h = a2.group_by(&:first)
297
+
298
+ doc = Rexle.new('<book/>')
299
+
300
+ h.each do |key, value|
301
+
302
+ echapter = Rexle::Element.new('chapter', attributes: {no: key})
303
+
304
+ value.each do |_, verse_no, verse|
305
+ everse = Rexle::Element.new('verse', attributes: {no: verse_no}).add_text verse
306
+ echapter.add everse
307
+ end
308
+
309
+ doc.root.add echapter
310
+
311
+ end
312
+
313
+ return doc
314
+
203
315
  end
204
316
 
317
+ def tts(chapter, verse)
318
+
319
+ verses = read(chapter, verse).map(&:text).join(' ')
320
+ @cerevoice.tts verses, out: 'tts.ogg'
321
+
322
+ end
323
+
324
+ private
325
+
326
+ def mine_words(r, trail, verses=[], level: 0)
327
+
328
+ i = 0
329
+ found = []
330
+ (i-=1; found = r.last[i][0].keys - trail) until found.any?
331
+ r2 = r.last[i][0].keys - trail
332
+ a3 = r2.map do |x|
333
+ _, doc_verse = assoc_r x, r.last[i][1]
334
+ chaptr_no = doc_verse.root.element('chapter').attributes[:no]
335
+ everse = doc_verse.root.element('chapter/verse')
336
+ verse_no = everse.attributes[:no]
337
+ [chaptr_no, verse_no, everse.text, level]
338
+ end.uniq
339
+
340
+ #a3b = a3.group_by {|x| x[0]}.map(&:last).flatten(1).sort_by {|x| x[0].to_i}
341
+ #a3b.length
342
+
343
+ puts 'verses: ' + verses.inspect if @debug
344
+ verses.concat a3
345
+ verses.uniq!
346
+ trail.concat r2
347
+ puts 'trail.length: ' + trail.length.inspect if @debug
348
+ puts 'verses.length: ' + verses.length.inspect if @debug
349
+
350
+ if verses.length < 30 or trail.length < 100
351
+ mine_words(r, trail, verses, level: level+1)
352
+ else
353
+ [verses, trail]
354
+ end
355
+ end
205
356
 
206
357
  end
207
358
 
208
359
  end
209
-
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bibletools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -36,7 +36,7 @@ cert_chain:
36
36
  hIlyeyH9vsaJLam1TYsSqh3KUBmRCIvc3XELg2MAlggTrHuCjBPdYXqjIOFvO6HZ
37
37
  Z1XDnMf4RhcEPF4AU3Q+Fefsw4qKRWH3YUw=
38
38
  -----END CERTIFICATE-----
39
- date: 2022-11-01 00:00:00.000000000 Z
39
+ date: 2022-11-05 00:00:00.000000000 Z
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokorexi
@@ -78,6 +78,46 @@ dependencies:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.3.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: cerecvoice2019
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '1.0'
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 1.0.1
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '1.0'
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 1.0.1
101
+ - !ruby/object:Gem::Dependency
102
+ name: english_spellchecker
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '0.3'
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.1
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.3'
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 0.3.1
81
121
  description:
82
122
  email: digital.robertson@gmail.com
83
123
  executables: []
metadata.gz.sig CHANGED
Binary file