bibletools 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b39f3947e9bf8e827ce070ad544f2f03a5b28e43554588ec99e6511aaf7c7109
4
- data.tar.gz: 6d88f41a49b461adc25993c65e2abbeffc6616b9d727bab0bcc93c3570bcc824
3
+ metadata.gz: c6ae238a57f75318c4238835d64567683523fdcf106e3a4eba8007e833de88ac
4
+ data.tar.gz: bd3471c321d7caf26d360bc66ee44eda5b3276d808de12e4ec450044a5f62515
5
5
  SHA512:
6
- metadata.gz: '09532d8e8b4d88ae66ffdcb7f62aec4c7887cb9b87497deb5b04206f6b3c5cbcbcffa618e2ded85a683a57f43907e8f5e30dcb0d62885481fc0a95540240cad6'
7
- data.tar.gz: ec3bc05ded3b431231eaea8f2e00cde75a4da60bc8cc4fbb6fda1cd3d8312fabd1d50aa7e096742f7beb05b36e306b853b4cd92d446a9bb060333ce544614645
6
+ metadata.gz: 5dddc1de4ef4dc2439a3f3ca8d4e49a7973dd89c06362ac1761be02ed2240a09367b622b3abdac5ad919f23c49ef3e72ee63dbac13bd4818aa3abe57b19dddb7
7
+ data.tar.gz: b39d63a2795fc9468e176ee44e674a79042e2f5608c64099b9604ce8e8d75f75376cd26fd6040760f5c113579fba00cfb1c8ee5d1c345576c1a064d54d9443a4
checksums.yaml.gz.sig CHANGED
Binary file
data/lib/bibletools.rb CHANGED
@@ -6,6 +6,85 @@ require 'rexle'
6
6
  require 'yawc'
7
7
  require 'nokorexi'
8
8
 
9
+ require 'cerecvoice2019'
10
+ require 'english_spellchecker'
11
+
12
+
13
+ BOOKS = [
14
+ "Genesis",
15
+ "Exodus",
16
+ "Leviticus",
17
+ "Numbers",
18
+ "Deuteronomy",
19
+ "Joshua",
20
+ "Judges",
21
+ "Ruth",
22
+ "1 Samuel",
23
+ "2 Samuel",
24
+ "1 Kings",
25
+ "2 Kings",
26
+ "1 Chronicles",
27
+ "2 Chronicles",
28
+ "Ezra",
29
+ "Nehemiah",
30
+ "Tobit",
31
+ "Judith",
32
+ "Esther",
33
+ "1 Maccabees",
34
+ "2 Maccabees",
35
+ "Job",
36
+ "Psalms",
37
+ "The Proverbs",
38
+ "Ecclesiastes",
39
+ "The Song of Songs",
40
+ "Wisdom",
41
+ "Ecclesiasticus / Sirach",
42
+ "Isaiah",
43
+ "Jeremiah",
44
+ "Lamentations",
45
+ "Baruch",
46
+ "Ezekiel",
47
+ "Daniel",
48
+ "Hosea",
49
+ "Joel",
50
+ "Amos",
51
+ "Obadiah",
52
+ "Jonah",
53
+ "Micah",
54
+ "Nahum",
55
+ "Habakkuk",
56
+ "Zephaniah",
57
+ "Haggai",
58
+ "Zechariah",
59
+ "Malachi",
60
+ "Matthew",
61
+ "Mark",
62
+ "Luke",
63
+ "John",
64
+ "Acts of Apostles",
65
+ "Romans",
66
+ "1 Corinthians",
67
+ "2 Corinthians",
68
+ "Galatians",
69
+ "Ephesians",
70
+ "Philippians",
71
+ "Colossians",
72
+ "1 Thessalonians",
73
+ "2 Thessalonians",
74
+ "1 Timothy",
75
+ "2 Timothy",
76
+ "Titus",
77
+ "Philemon",
78
+ "Hebrews",
79
+ "James",
80
+ "1 Peter",
81
+ "2 Peter",
82
+ "1 John",
83
+ "2 John",
84
+ "3 John",
85
+ "Jude",
86
+ "Revelation"
87
+ ]
9
88
 
10
89
  module BibleTools
11
90
 
@@ -20,94 +99,17 @@ module BibleTools
20
99
 
21
100
  baseurl = url
22
101
 
23
- books = ["Genesis",
24
- "Exodus",
25
- "Leviticus",
26
- "Numbers",
27
- "Deuteronomy",
28
- "Joshua",
29
- "Judges",
30
- "Ruth",
31
- "1 Samuel",
32
- "2 Samuel",
33
- "1 Kings",
34
- "2 Kings",
35
- "1 Chronicles",
36
- "2 Chronicles",
37
- "Ezra",
38
- "Nehemiah",
39
- "Tobit",
40
- "Judith",
41
- "Esther",
42
- "1 Maccabees",
43
- "2 Maccabees",
44
- "Job",
45
- "Psalms",
46
- "The Proverbs",
47
- "Ecclesiastes",
48
- "The Song of Songs",
49
- "Wisdom",
50
- "Ecclesiasticus / Sirach",
51
- "Isaiah",
52
- "Jeremiah",
53
- "Lamentations",
54
- "Baruch",
55
- "Ezekiel",
56
- "Daniel",
57
- "Hosea",
58
- "Joel",
59
- "Amos",
60
- "Obadiah",
61
- "Jonah",
62
- "Micah",
63
- "Nahum",
64
- "Habakkuk",
65
- "Zephaniah",
66
- "Haggai",
67
- "Zechariah",
68
- "Malachi",
69
- "Matthew",
70
- "Mark",
71
- "Luke",
72
- "John",
73
- "Acts of Apostles",
74
- "Romans",
75
- "1 Corinthians",
76
- "2 Corinthians",
77
- "Galatians",
78
- "Ephesians",
79
- "Philippians",
80
- "Colossians",
81
- "1 Thessalonians",
82
- "2 Thessalonians",
83
- "1 Timothy",
84
- "2 Timothy",
85
- "Titus",
86
- "Philemon",
87
- "Hebrews",
88
- "James",
89
- "1 Peter",
90
- "2 Peter",
91
- "1 John",
92
- "2 John",
93
- "3 John",
94
- "Jude",
95
- "Revelation"]
96
-
97
102
  id = if book.is_a? String then
98
- (books.index(book) + 1).to_s
103
+ (BOOKS.index(book) + 1).to_s
99
104
  else
100
105
  book.to_s
101
106
  end
102
107
 
103
-
104
-
105
108
  url2 = baseurl + '?id=' + id
106
109
  doc = Nokorexi.new(url2).to_doc
107
110
  e = doc.root.at_css('.pagination')
108
111
  endchapter = e.xpath('li')[-2].text('a')
109
112
 
110
-
111
113
  book = Rexle.new('<book/>')
112
114
 
113
115
  (1..endchapter.to_i).each do |chp|
@@ -143,39 +145,128 @@ module BibleTools
143
145
  def to_doc
144
146
  @doc
145
147
  end
148
+
149
+ def save(title)
150
+
151
+ filename = title.downcase.gsub(/\s+/,'-') + '.xml'
152
+ File.write filename, @doc.root.xml(pretty: true)
153
+ puts filename +' saved!'
154
+
155
+ end
146
156
 
147
157
  end
148
158
 
149
159
  class Analyse
150
160
 
151
- def initialize(bible_obj, debug: false)
161
+ def initialize(bible_obj, tts: nil, debug: false)
152
162
 
153
163
  @debug = debug
154
164
 
155
165
  if bible_obj then
156
166
  @doc = bible_obj.to_doc
157
167
  @verses = @doc.root.xpath('chapter/verse')
168
+ @check = EnglishSpellcheck.new debug: false #verbose: false
169
+
170
+ if tts then
171
+ @cerevoice = Cerecvoice2019.new accountid: tts[:account],
172
+ password: tts[:password], voice: tts[:voice] || 'Stuart'
173
+ end
174
+
175
+ end
176
+
177
+
178
+ end
179
+
180
+ # Automatically select verses; verses are selected based upon word
181
+ # frequency
182
+ #
183
+ def asr()
184
+
185
+ r = assoc_r()
186
+ e = r[1].root.element('chapter/verse')
187
+ verse_no = e.attributes[:no]
188
+ chptr_no = e.parent.attributes[:no]
189
+
190
+ # we are backtracking the results to hopefully find a new branch to explore
191
+ trail = r.first
192
+
193
+ a, trail2 = mine_words(r, trail, level: 2)
194
+ a << [chptr_no, verse_no, e.text, 1]
195
+
196
+ h = a.group_by(&:first)
197
+ a2 = h.sort_by {|x, _| x.to_i}
198
+ a3 = a2.map do |chapter, verses|
199
+ [chapter, verses.sort_by {|_,x| x.to_i}]
158
200
  end
201
+
202
+ [a3, trail2]
203
+ end
204
+
205
+ # find the words associated with a given keyword
206
+ #
207
+ def assoc(keyword, doc=@doc)
208
+ doc2 = self.search keyword, doc
209
+ [self.words(doc: doc2), doc2]
210
+ end
211
+
212
+ def assoc_r(keyword=words().keys.first, doc=@doc, list=[], results=[])
213
+
214
+ result = assoc(keyword, doc)
215
+ results << result
216
+ h, doc2 = result
217
+ new_keyword = h.keys.find {|x| not list.include? x}
218
+ return [list, doc2, results] unless new_keyword
219
+
220
+ list << new_keyword
221
+ assoc_r(new_keyword, doc2, list, results)
159
222
 
160
223
  end
161
224
 
162
- def wordtally()
225
+ def custom_words(doc: @doc, level: 3)
226
+
227
+ a = wordtally(level: level, doc: doc).keys.map do |word|
228
+ puts 'word: ' + word.inspect if @debug
229
+ [word, @check.spelling(word, verbose: false)]
230
+ end
163
231
 
164
- return unless @doc
165
- Yawc.new(@doc.root.plaintext).to_h
232
+ a2 = a.select do |w, x|
233
+ if x.is_a? String then
234
+ w != x
235
+ elsif x.is_a? Array
236
+ not (w == x.join or w == x.first)
237
+ else
238
+ true
239
+ end
240
+ end
241
+ a2.map(&:first)
242
+
243
+ end
244
+
245
+ def wordtally(level: 2, doc: @doc)
246
+
247
+ return unless doc
248
+ Yawc.new(doc.root.plaintext, level: level).to_h
166
249
 
167
250
  end
168
251
 
169
252
  alias words wordtally
170
253
 
171
254
  def read(chapter, verse)
172
- @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")
255
+
256
+ if verse.is_a? Integer then
257
+ [@doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{verse}']")]
258
+ elsif verse.is_a? Array
259
+ verse.map do |x|
260
+ @doc.root.element("chapter[@no='#{chapter}']/verse[@no='#{x}']")
261
+ end
262
+ end
173
263
  end
174
264
 
175
265
 
176
- def search(keyword)
266
+ def search(keyword, doc=@doc)
177
267
 
178
- a = @verses.select {|x| x.text =~ /#{keyword}/}
268
+ verses = doc.root.xpath('chapter/verse')
269
+ a = verses.select {|x| x.text =~ /#{keyword}/i}
179
270
 
180
271
  a2 = a.map.with_index do |verse, i|
181
272
  txt = verse.text
@@ -185,9 +276,10 @@ module BibleTools
185
276
  n = 0
186
277
  chapter = verse.parent.attributes[:no]
187
278
 
188
- until txt.rstrip[-1] == '.' or n > 2 do
279
+ until txt.rstrip[/[\.\?!]\W*$/] or n > 2 do
189
280
  n +=1
190
- nverse = read(chapter, verse.attributes[:no].to_i + n)
281
+ nverse = read(chapter, verse.attributes[:no].to_i + n)[0]
282
+ puts 'nverse: ' + nverse&.text.inspect if @debug
191
283
  txt += nverse.text
192
284
  end
193
285
 
@@ -199,11 +291,69 @@ module BibleTools
199
291
  [verse.parent.attributes[:no], verse.attributes[:no], txt]
200
292
  end
201
293
 
202
- h1 = a2.group_by(&:first)
294
+ # group by chapter
295
+ #
296
+ h = a2.group_by(&:first)
297
+
298
+ doc = Rexle.new('<book/>')
299
+
300
+ h.each do |key, value|
301
+
302
+ echapter = Rexle::Element.new('chapter', attributes: {no: key})
303
+
304
+ value.each do |_, verse_no, verse|
305
+ everse = Rexle::Element.new('verse', attributes: {no: verse_no}).add_text verse
306
+ echapter.add everse
307
+ end
308
+
309
+ doc.root.add echapter
310
+
311
+ end
312
+
313
+ return doc
314
+
203
315
  end
204
316
 
317
+ def tts(chapter, verse)
318
+
319
+ verses = read(chapter, verse).map(&:text).join(' ')
320
+ @cerevoice.tts verses, out: 'tts.ogg'
321
+
322
+ end
323
+
324
+ private
325
+
326
+ def mine_words(r, trail, verses=[], level: 0)
327
+
328
+ i = 0
329
+ found = []
330
+ (i-=1; found = r.last[i][0].keys - trail) until found.any?
331
+ r2 = r.last[i][0].keys - trail
332
+ a3 = r2.map do |x|
333
+ _, doc_verse = assoc_r x, r.last[i][1]
334
+ chaptr_no = doc_verse.root.element('chapter').attributes[:no]
335
+ everse = doc_verse.root.element('chapter/verse')
336
+ verse_no = everse.attributes[:no]
337
+ [chaptr_no, verse_no, everse.text, level]
338
+ end.uniq
339
+
340
+ #a3b = a3.group_by {|x| x[0]}.map(&:last).flatten(1).sort_by {|x| x[0].to_i}
341
+ #a3b.length
342
+
343
+ puts 'verses: ' + verses.inspect if @debug
344
+ verses.concat a3
345
+ verses.uniq!
346
+ trail.concat r2
347
+ puts 'trail.length: ' + trail.length.inspect if @debug
348
+ puts 'verses.length: ' + verses.length.inspect if @debug
349
+
350
+ if verses.length < 30 or trail.length < 100
351
+ mine_words(r, trail, verses, level: level+1)
352
+ else
353
+ [verses, trail]
354
+ end
355
+ end
205
356
 
206
357
  end
207
358
 
208
359
  end
209
-
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bibletools
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - James Robertson
@@ -36,7 +36,7 @@ cert_chain:
36
36
  hIlyeyH9vsaJLam1TYsSqh3KUBmRCIvc3XELg2MAlggTrHuCjBPdYXqjIOFvO6HZ
37
37
  Z1XDnMf4RhcEPF4AU3Q+Fefsw4qKRWH3YUw=
38
38
  -----END CERTIFICATE-----
39
- date: 2022-11-01 00:00:00.000000000 Z
39
+ date: 2022-11-05 00:00:00.000000000 Z
40
40
  dependencies:
41
41
  - !ruby/object:Gem::Dependency
42
42
  name: nokorexi
@@ -78,6 +78,46 @@ dependencies:
78
78
  - - ">="
79
79
  - !ruby/object:Gem::Version
80
80
  version: 0.3.0
81
+ - !ruby/object:Gem::Dependency
82
+ name: cerecvoice2019
83
+ requirement: !ruby/object:Gem::Requirement
84
+ requirements:
85
+ - - "~>"
86
+ - !ruby/object:Gem::Version
87
+ version: '1.0'
88
+ - - ">="
89
+ - !ruby/object:Gem::Version
90
+ version: 1.0.1
91
+ type: :runtime
92
+ prerelease: false
93
+ version_requirements: !ruby/object:Gem::Requirement
94
+ requirements:
95
+ - - "~>"
96
+ - !ruby/object:Gem::Version
97
+ version: '1.0'
98
+ - - ">="
99
+ - !ruby/object:Gem::Version
100
+ version: 1.0.1
101
+ - !ruby/object:Gem::Dependency
102
+ name: english_spellchecker
103
+ requirement: !ruby/object:Gem::Requirement
104
+ requirements:
105
+ - - "~>"
106
+ - !ruby/object:Gem::Version
107
+ version: '0.3'
108
+ - - ">="
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.1
111
+ type: :runtime
112
+ prerelease: false
113
+ version_requirements: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '0.3'
118
+ - - ">="
119
+ - !ruby/object:Gem::Version
120
+ version: 0.3.1
81
121
  description:
82
122
  email: digital.robertson@gmail.com
83
123
  executables: []
metadata.gz.sig CHANGED
Binary file