bibletools 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b39f3947e9bf8e827ce070ad544f2f03a5b28e43554588ec99e6511aaf7c7109
4
- data.tar.gz: 6d88f41a49b461adc25993c65e2abbeffc6616b9d727bab0bcc93c3570bcc824
3
+ metadata.gz: 5085703f655b244826701beeba91b5bbfc535a8d34f2d4fbbc842c6229ced5fa
4
+ data.tar.gz: 27639f898a9c016fdbaf47cbaddeae392c93500145492ea0ba51f1a2ebc82e1b
5
5
  SHA512:
6
- metadata.gz: '09532d8e8b4d88ae66ffdcb7f62aec4c7887cb9b87497deb5b04206f6b3c5cbcbcffa618e2ded85a683a57f43907e8f5e30dcb0d62885481fc0a95540240cad6'
7
- data.tar.gz: ec3bc05ded3b431231eaea8f2e00cde75a4da60bc8cc4fbb6fda1cd3d8312fabd1d50aa7e096742f7beb05b36e306b853b4cd92d446a9bb060333ce544614645
6
+ metadata.gz: a70124fe758ee62e6874e6126db903c56eb0edf8bd35f5ea7a80dae1917cc30c225191dc6eec93f35ac1f63b77dbaef461f25398a5a0db90684ece938aa427c8
7
+ data.tar.gz: 861ed48ae15f6d7516d98fac70e76b2b2bc6633c90f56193b7ca4f5f8fafbeee07e12ba8adbef23cb57119ef7c0a0c9ff31e7ec49a91383f695c19955dbfc9fc
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/bibletools.rb CHANGED
@@ -6,6 +6,85 @@ require 'rexle'
6
6
  require 'yawc'
7
7
  require 'nokorexi'
8
8
 
9
+ require 'cerecvoice2019'
10
+ require 'english_spellchecker'
11
+
12
+
13
+ BOOKS = [
14
+ "Genesis",
15
+ "Exodus",
16
+ "Leviticus",
17
+ "Numbers",
18
+ "Deuteronomy",
19
+ "Joshua",
20
+ "Judges",
21
+ "Ruth",
22
+ "1 Samuel",
23
+ "2 Samuel",
24
+ "1 Kings",
25
+ "2 Kings",
26
+ "1 Chronicles",
27
+ "2 Chronicles",
28
+ "Ezra",
29
+ "Nehemiah",
30
+ "Tobit",
31
+ "Judith",
32
+ "Esther",
33
+ "1 Maccabees",
34
+ "2 Maccabees",
35
+ "Job",
36
+ "Psalms",
37
+ "The Proverbs",
38
+ "Ecclesiastes",
39
+ "The Song of Songs",
40
+ "Wisdom",
41
+ "Ecclesiasticus / Sirach",
42
+ "Isaiah",
43
+ "Jeremiah",
44
+ "Lamentations",
45
+ "Baruch",
46
+ "Ezekiel",
47
+ "Daniel",
48
+ "Hosea",
49
+ "Joel",
50
+ "Amos",
51
+ "Obadiah",
52
+ "Jonah",
53
+ "Micah",
54
+ "Nahum",
55
+ "Habakkuk",
56
+ "Zephaniah",
57
+ "Haggai",
58
+ "Zechariah",
59
+ "Malachi",
60
+ "Matthew",
61
+ "Mark",
62
+ "Luke",
63
+ "John",
64
+ "Acts of Apostles",
65
+ "Romans",
66
+ "1 Corinthians",
67
+ "2 Corinthians",
68
+ "Galatians",
69
+ "Ephesians",
70
+ "Philippians",
71
+ "Colossians",
72
+ "1 Thessalonians",
73
+ "2 Thessalonians",
74
+ "1 Timothy",
75
+ "2 Timothy",
76
+ "Titus",
77
+ "Philemon",
78
+ "Hebrews",
79
+ "James",
80
+ "1 Peter",
81
+ "2 Peter",
82
+ "1 John",
83
+ "2 John",
84
+ "3 John",
85
+ "Jude",
86
+ "Revelation"
87
+ ]
9
88
 
10
89
  module BibleTools
11
90
 
@@ -20,94 +99,17 @@ module BibleTools
20
99
 
21
100
  baseurl = url
22
101
 
23
- books = ["Genesis",
24
- "Exodus",
25
- "Leviticus",
26
- "Numbers",
27
- "Deuteronomy",
28
- "Joshua",
29
- "Judges",
30
- "Ruth",
31
- "1 Samuel",
32
- "2 Samuel",
33
- "1 Kings",
34
- "2 Kings",
35
- "1 Chronicles",
36
- "2 Chronicles",
37
- "Ezra",
38
- "Nehemiah",
39
- "Tobit",
40
- "Judith",
41
- "Esther",
42
- "1 Maccabees",
43
- "2 Maccabees",
44
- "Job",
45
- "Psalms",
46
- "The Proverbs",
47
- "Ecclesiastes",
48
- "The Song of Songs",
49
- "Wisdom",
50
- "Ecclesiasticus / Sirach",
51
- "Isaiah",
52
- "Jeremiah",
53
- "Lamentations",
54
- "Baruch",
55
- "Ezekiel",
56
- "Daniel",
57
- "Hosea",
58
- "Joel",
59
- "Amos",
60
- "Obadiah",
61
- "Jonah",
62
- "Micah",
63
- "Nahum",
64
- "Habakkuk",
65
- "Zephaniah",
66
- "Haggai",
67
- "Zechariah",
68
- "Malachi",
69
- "Matthew",
70
- "Mark",
71
- "Luke",
72
- "John",
73
- "Acts of Apostles",
74
- "Romans",
75
- "1 Corinthians",
76
- "2 Corinthians",
77
- "Galatians",
78
- "Ephesians",
79
- "Philippians",
80
- "Colossians",
81
- "1 Thessalonians",
82
- "2 Thessalonians",
83
- "1 Timothy",
84
- "2 Timothy",
85
- "Titus",
86
- "Philemon",
87
- "Hebrews",
88
- "James",
89
- "1 Peter",
90
- "2 Peter",
91
- "1 John",
92
- "2 John",
93
- "3 John",
94
- "Jude",
95
- "Revelation"]
96
-
97
102
  id = if book.is_a? String then
98
- (books.index(book) + 1).to_s
103
+ (BOOKS.index(book) + 1).to_s
99
104
  else
100
105
  book.to_s
101
106
  end
102
107
 
103
-
104
-
105
108
  url2 = baseurl + '?id=' + id
106
109
  doc = Nokorexi.new(url2).to_doc
107
110
  e = doc.root.at_css('.pagination')
108
111
  endchapter = e.xpath('li')[-2].text('a')
109
112
 
110
-
111
113
  book = Rexle.new('<book/>')
112
114
 
113
115
  (1..endchapter.to_i).each do |chp|
@@ -143,39 +145,128 @@ module BibleTools
143
145
  def to_doc
144
146
  @doc
145
147
  end
148
+
149
+ def save(title)
150
+
151
+ filename = title.downcase.gsub(/\s+/,'-') + '.xml'
152
+ File.write filename, @doc.root.xml(pretty: true)
153
+ puts filename +' saved!'
154
+
155
+ end
146
156
 
147
157
  end
148
158
 
149
159
  class Analyse
150
160
 
151
- def initialize(bible_obj, debug: false)
161
+ def initialize(bible_obj, tts: nil, debug: false)
152
162
 
153
163
  @debug = debug
154
164
 
155
165
  if bible_obj then
166
+
156
167
  @doc = bible_obj.to_doc
157
- @verses = @doc.root.xpath('chapter/verse')
168
+ @check = EnglishSpellcheck.new debug: false #verbose: false
169
+
170
+ if tts then
171
+ @cerevoice = Cerecvoice2019.new accountid: tts[:account],
172
+ password: tts[:password], voice: tts[:voice] || 'Stuart'
173
+ end
174
+
175
+ end
176
+
177
+
178
+ end
179
+
180
+ # Automatically select verses; verses are selected based upon word
181
+ # frequency
182
+ #
183
+ def asr()
184
+
185
+ r = assoc_r()
186
+ e = r[1].root.element('chapter/verse')
187
+ verse_no = e.attributes[:no]
188
+ chptr_no = e.parent.attributes[:no]
189
+
190
+ # we are backtracking the results to hopefully find a new branch to explore
191
+ trail = r.first
192
+
193
+ a, trail2 = mine_words(r, trail, level: 2)
194
+ a << [chptr_no, verse_no, e.text, 1]
195
+
196
+ h = a.group_by(&:first)
197
+ a2 = h.sort_by {|x, _| x.to_i}
198
+ @verses = a2.map do |chapter, verses|
199
+ [chapter, verses.sort_by {|_,x| x.to_i}]
158
200
  end
201
+
202
+ [@verses, trail2]
203
+ end
204
+
205
+ # find the words associated with a given keyword
206
+ #
207
+ def assoc(keyword, doc=@doc)
208
+ doc2 = self.search keyword, doc
209
+ [self.words(doc: doc2), doc2]
210
+ end
211
+
212
+ def assoc_r(keyword=words().keys.first, doc=@doc, list=[], results=[])
213
+
214
+ result = assoc(keyword, doc)
215
+ results << result
216
+ h, doc2 = result
217
+ new_keyword = h.keys.find {|x| not list.include? x}
218
+ return [list, doc2, results] unless new_keyword
219
+
220
+ list << new_keyword
221
+ assoc_r(new_keyword, doc2, list, results)
159
222
 
160
223
  end
161
224
 
162
- def wordtally()
225
+ def custom_words(doc: @doc, level: 3)
226
+
227
+ a = wordtally(level: level, doc: doc).keys.map do |word|
228
+ puts 'word: ' + word.inspect if @debug
229
+ [word, @check.spelling(word, verbose: false)]
230
+ end
163
231
 
164
- return unless @doc
165
- Yawc.new(@doc.root.plaintext).to_h
232
+ a2 = a.select do |w, x|
233
+ if x.is_a? String then
234
+ w != x
235
+ elsif x.is_a? Array
236
+ not (w == x.join or w == x.first)
237
+ else
238
+ true
239
+ end
240
+ end
241
+ a2.map(&:first)
242
+
243
+ end
244
+
245
+ def wordtally(level: 2, doc: @doc)
246
+
247
+ return unless doc
248
+ Yawc.new(doc.root.plaintext, level: level).to_h
166
249
 
167
250
  end
168
251
 
169
252
  alias words wordtally
170
253
 
171
254
  def read(chapter, verse)
172
- @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")
255
+
256
+ if verse.is_a? Integer then
257
+ [@doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")]
258
+ elsif verse.is_a? Array
259
+ verse.map do |x|
260
+ @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{x}']")
261
+ end
262
+ end
173
263
  end
174
264
 
175
265
 
176
- def search(keyword)
266
+ def search(keyword, doc=@doc)
177
267
 
178
- a = @verses.select {|x| x.text =~ /#{keyword}/}
268
+ verses = doc.root.xpath('chapter/verse')
269
+ a = verses.select {|x| x.text =~ /#{keyword}/i}
179
270
 
180
271
  a2 = a.map.with_index do |verse, i|
181
272
  txt = verse.text
@@ -185,9 +276,10 @@ module BibleTools
185
276
  n = 0
186
277
  chapter = verse.parent.attributes[:no]
187
278
 
188
- until txt.rstrip[-1] == '.' or n > 2 do
279
+ until txt.rstrip[/[\.\?!]\W*$/] or n > 2 do
189
280
  n +=1
190
- nverse = read(chapter, verse.attributes[:no].to_i + n)
281
+ nverse = read(chapter, verse.attributes[:no].to_i + n)[0]
282
+ puts 'nverse: ' + nverse&.text.inspect if @debug
191
283
  txt += nverse.text
192
284
  end
193
285
 
@@ -199,11 +291,118 @@ module BibleTools
199
291
  [verse.parent.attributes[:no], verse.attributes[:no], txt]
200
292
  end
201
293
 
202
- h1 = a2.group_by(&:first)
294
+ # group by chapter
295
+ #
296
+ h = a2.group_by(&:first)
297
+
298
+ doc = Rexle.new('<book/>')
299
+
300
+ h.each do |key, value|
301
+
302
+ echapter = Rexle::Element.new('chapter', attributes: {no: key})
303
+
304
+ value.each do |_, verse_no, verse|
305
+ everse = Rexle::Element.new('verse', attributes: {no: verse_no}).add_text verse
306
+ echapter.add everse
307
+ end
308
+
309
+ doc.root.add echapter
310
+
311
+ end
312
+
313
+ return doc
314
+
315
+ end
316
+
317
+ def tts(chapter, verse)
318
+
319
+ verses = read(chapter, verse).map(&:text).join(' ')
320
+ @cerevoice.tts verses, out: 'tts.ogg'
321
+
322
+ end
323
+
324
+ def verses(level: nil, html: false, title: nil)
325
+
326
+ a = @verses.map {|_, body| body}.flatten(1)
327
+ a1 = level ? a.select {|c,v,t,l| l <= level} : a.map {|c,v,t,l| [c,v,t]}
328
+ a2 = a1.map {|c,v,t,l| [c,v,t]}.uniq
329
+ h = a2.group_by(&:first)
330
+
331
+ puts 'html: ' + html.inspect
332
+ return h unless html == true
333
+
334
+ doc = Rexle.new('<html/>')
335
+
336
+
337
+ head = Rexle::Element.new('head')
338
+ head.add(Rexle::Element.new('h1').add_text(title) ) if title
339
+
340
+ style = Rexle::Element.new('style')
341
+ style.add_text ' ins {font-size: 0.8em;padding: 0.8em;}'
342
+ head.add style
343
+
344
+ doc.root.add head
345
+ body = Rexle::Element.new('body')
346
+
347
+ h.each do |chptr, verses|
348
+
349
+ h2 = Rexle::Element.new('h2').add_text chptr
350
+ body.add h2
351
+
352
+ verses.each do |cno, vno, text|
353
+
354
+ ins = Rexle::Element.new('ins').add_text vno
355
+ para = Rexle::Element.new('p')
356
+ para.add ins
357
+ para.add_text text
358
+ body.add para
359
+
360
+ end
361
+
362
+ end
363
+
364
+ doc.root.add body
365
+
366
+ doc.root.xml pretty: true
367
+
203
368
  end
204
369
 
370
+ private
371
+
372
+ def mine_words(r, trail, verses=[], level: 0)
373
+
374
+ i = 0
375
+ found = []
376
+ (i-=1; found = r.last[i][0].keys - trail) until found.any?
377
+ r2 = r.last[i][0].keys - trail
378
+
379
+ a3 = r2.map do |x|
380
+
381
+ _, doc_verse = assoc_r x, r.last[i][1]
382
+ chaptr_no = doc_verse.root.element('chapter').attributes[:no]
383
+ everse = doc_verse.root.element('chapter/verse')
384
+ verse_no = everse.attributes[:no]
385
+
386
+ [chaptr_no, verse_no, everse.text, level]
387
+
388
+ end.uniq
389
+
390
+ puts 'verses: ' + verses.inspect if @debug
391
+
392
+ verses.concat a3
393
+ verses.uniq!
394
+ trail.concat r2
395
+
396
+ puts 'trail.length: ' + trail.length.inspect if @debug
397
+ puts 'verses.length: ' + verses.length.inspect if @debug
398
+
399
+ if verses.length < 30 or trail.length < 100
400
+ mine_words(r, trail, verses, level: level+1)
401
+ else
402
+ [verses, trail]
403
+ end
404
+ end
205
405
 
206
406
  end
207
407
 
208
408
  end
209
-
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bibletools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -36,7 +36,7 @@ cert_chain:
36
36
  hIlyeyH9vsaJLam1TYsSqh3KUBmRCIvc3XELg2MAlggTrHuCjBPdYXqjIOFvO6HZ
37
37
  Z1XDnMf4RhcEPF4AU3Q+Fefsw4qKRWH3YUw=
38
38
  -----END CERTIFICATE-----
39
- date: 2022-11-01 00:00:00.000000000 Z
39
+ date: 2022-11-06 00:00:00.000000000 Z
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokorexi
@@ -78,6 +78,46 @@ dependencies:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.3.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: cerecvoice2019
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '1.0'
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 1.0.1
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '1.0'
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 1.0.1
101
+ - !ruby/object:Gem::Dependency
102
+ name: english_spellchecker
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '0.3'
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.1
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.3'
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 0.3.1
81
121
  description:
82
122
  email: digital.robertson@gmail.com
83
123
  executables: []
metadata.gz.sig CHANGED
Binary file