linkture 2.5.6__py3-none-any.whl → 2.6.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
linkture/__init__.py CHANGED
@@ -0,0 +1,3 @@
1
+ #!/usr/bin/env python
2
+
3
+ from .linkture import *
linkture/__main__.py CHANGED
@@ -26,591 +26,12 @@
26
26
  SOFTWARE.
27
27
  """
28
28
 
29
- __version__ = 'v2.5.6'
30
-
31
-
32
- import argparse, json, regex, sqlite3
33
- import pandas as pd
34
-
29
+ import argparse
30
+ from .linkture import available_languages, __app__, __version__, Scriptures
35
31
  from ast import literal_eval
36
- from pathlib import Path
37
- from unidecode import unidecode
38
-
39
-
40
- available_languages = ('Cebuano', 'Chinese', 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Hungarian', 'Italian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese', 'Russian', 'Spanish', 'Tagalog', 'Ukrainian')
41
- non_latin = ('Chinese', 'Greek', 'Japanese', 'Korean', 'Russian', 'Ukrainian')
42
-
43
-
44
- class Scriptures():
45
-
46
- def __init__(self, language='English', translate=None, form=None, separator=' ', upper=False, verbose=False):
47
- self._verbose = verbose
48
- self._separator = separator
49
- if language not in available_languages:
50
- raise ValueError("Indicated source language is not an option!")
51
- if translate:
52
- if translate not in available_languages:
53
- raise ValueError("Indicated translation language is not an option!")
54
- else:
55
- translate = language
56
- if language in non_latin:
57
- self._nl = True
58
- else:
59
- self._nl = False
60
- self._rewrite = bool((language != translate) or form)
61
- self._upper = upper
62
- if form == "full":
63
- form = 3
64
- elif form == "standard":
65
- form = 4
66
- elif form == "official":
67
- form = 5
68
- else:
69
- form = 3
70
- self._src_book_names = {}
71
- path = Path(__file__).resolve().parent
72
-
73
- self._tr_book_names = ['Bible']
74
- con = sqlite3.connect(path / 'res/resources.db')
75
- cur = con.cursor()
76
- for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{translate}';").fetchall():
77
- if self._upper:
78
- tr = rec[form].upper()
79
- else:
80
- tr = rec[form]
81
- self._tr_book_names.insert(rec[2], tr)
82
- for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
83
- for i in range(3,6):
84
- item = rec[i]
85
- if not self._nl:
86
- item = unidecode(item)
87
- normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
88
- self._src_book_names[normalized] = rec[2]
89
- with open(path / 'res/custom.json', 'r', encoding='UTF-8') as json_file:
90
- b = json.load(json_file)
91
- if language in b.keys():
92
- for row in b[language]:
93
- names = row[1].split(', ')
94
- for item in names:
95
- if not self._nl:
96
- item = unidecode(item)
97
- normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
98
- self._src_book_names[normalized] = row[0]
99
- self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
100
- self._verses = pd.read_sql_query("SELECT * FROM Verses;", con)
101
- self._chapters = pd.read_sql_query("SELECT * FROM Chapters;", con)
102
- cur.close()
103
- con.close()
104
- self._reported = []
105
- self._encoded = {}
106
- self._linked = {}
107
-
108
- # Scripture reference parser:
109
- self._first_pass = regex.compile(r"""(
110
- {{.*?}} |
111
-
112
- (?:[1-5] (?:\p{Z} |
113
- \.\p{Z}? |
114
- \p{Pd} |
115
- \p{L}{1,2} (?:\p{Z} |
116
- \.\p{Z}? |
117
- \p{Pd}))? |
118
- [IV]{1,3} (?:\p{Z} |
119
- \.\p{Z}? |
120
- \p{Pd}) )?
121
- (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
122
- (?:\d+\p{Z}?[:,\.\p{Pd};]\p{Z}?)*
123
- (?<=[\p{L},:\p{Pd}]\p{Z} |
124
- [\p{L},:\p{Pd}] |
125
- \.)\d+
126
- (?![,\p{Pd}\p{L}]) |
127
-
128
- (?:[1-5] (?:\p{Z} |
129
- \.\p{Z}? |
130
- \p{Pd} |
131
- \p{L}{1,2} (?:\p{Z} |
132
- \.\p{Z}? |
133
- \p{Pd}))? |
134
- [IV]{1,3} (?:\p{Z} |
135
- \.\p{Z}? |
136
- \p{Pd}) )
137
- (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]*\p{L}
138
- )""", flags=regex.VERBOSE | regex.IGNORECASE)
139
-
140
- self._second_pass = regex.compile(r"""(
141
- (?![^{]*}) # ignore already marked
142
- \p{L}[\p{L}\p{Pd}\.]+\p{Z}?
143
- (?:\d+\p{Z}?[:,\p{Pd};]\p{Z}?)*\d+
144
- (?![,\p{Pd}\p{L}])
145
- )""", flags=regex.VERBOSE)
146
-
147
- self._bk_ref = regex.compile(r"""
148
- ((?:[1-5]\p{L}{0,2} |
149
- [IV]{1,3} )?
150
- [\p{Pd}\.]?[\p{L}\p{Pd}\.\p{Z}]{2,})(.*)
151
- """, flags=regex.VERBOSE | regex.IGNORECASE)
152
-
153
- self._tagged = regex.compile(r'({{.*?}})')
154
-
155
- self._cv_cv = regex.compile(r'(\d+):(\d+)-(\d+):(\d+)')
156
- self._v_cv = regex.compile(r'(\d+)-(\d+):(\d+)')
157
- self._cv_v = regex.compile(r'(\d+):(\d+)-(\d+)')
158
- self._cv = regex.compile(r'(\d+):(\d+)')
159
- self._ddd = regex.compile(r'(\d+),(\d+),(\d+)')
160
- self._dd_d = regex.compile(r'(\d+),(\d+)-(\d+)')
161
- self._d_dd = regex.compile(r'(\d+)-(\d+),(\d+)')
162
- self._d_d = regex.compile(r'(\d+)-(\d+)(?!:)')
163
- self._dd = regex.compile(r'(\d+),(\d+)')
164
- self._d = regex.compile(r'(\d+)')
165
-
166
- self._chunk = regex.compile(r'([^,;\p{Z}]+.*)')
167
- self._sep = regex.compile(r'(?<!;)\s')
168
-
169
- def _error_report(self, scripture, message):
170
- if self._verbose and (scripture not in self._reported):
171
- print(f'** "{scripture}" - {message}')
172
- self._reported.append(scripture)
173
-
174
- def _scripture_parts(self, scripture):
175
-
176
- def check_book(bk_name):
177
- if not self._nl:
178
- bk_name = unidecode(bk_name) # NOTE: this converts Génesis to Genesis and English recognizes it !! Feature :-)
179
- bk_name = regex.sub(r'\p{P}|\p{Z}', '', bk_name.upper())
180
- if bk_name not in self._src_book_names:
181
- return None, 0
182
- else:
183
- bk_num = self._src_book_names[bk_name]
184
- return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]
185
-
186
- reduced = regex.sub(r'\p{Z}', '', scripture)
187
- reduced = regex.sub(r'\p{Pd}', '-', reduced)
188
- result = self._bk_ref.search(reduced)
189
- if result:
190
- bk_name, rest = result.group(1).strip(), result.group(2).strip()
191
- bk_num, last = check_book(bk_name)
192
- if bk_num:
193
- tr_name = self._tr_book_names[bk_num]
194
- return tr_name, rest.replace('.', ':'), bk_num, last # for period notation cases (Gen 1.1)
195
- return None, None, None, 0
196
-
197
- def _locate_scriptures(self, text):
198
-
199
- def r(match):
200
- scripture = match.group(1)
201
- if regex.match(r'{{.*}}', scripture):
202
- tag = True
203
- scripture = scripture.strip('}{')
204
- else:
205
- tag = False
206
- if scripture in self._encoded.keys():
207
- return '{{' + scripture +'}}'
208
- _, rest, bk_num, last = self._scripture_parts(scripture)
209
- if bk_num:
210
- code = self._code_scripture(scripture, bk_num, rest, last) # validation performed
211
- if code:
212
- self._encoded[scripture] = code
213
- return '{{' + scripture +'}}'
214
- if tag:
215
- return '»»|' + scripture +'|««' # So as not to lose {{ }} on unrecognized pre-tagged scriptures (other language, etc.)
216
- else:
217
- return scripture
218
-
219
- self._reported = []
220
- text = regex.sub(self._first_pass, r, text)
221
- return regex.sub(self._second_pass, r, text)
222
-
223
-
224
- def list_scriptures(self, text):
225
- lst = []
226
- text = self._locate_scriptures(text)
227
- for scripture in regex.findall(self._tagged, text):
228
- script = scripture.strip('}{')
229
- if self._rewrite:
230
- script = self.decode_scriptures(self._encoded[script])[0]
231
- if self._upper:
232
- script = script.upper()
233
- lst.append(script)
234
- return lst
235
-
236
- def tag_scriptures(self, text):
237
- return self.rewrite_scriptures(text, True)
238
-
239
- def rewrite_scriptures(self, text, tag=False):
240
-
241
- def r(match):
242
- script = match.group(1).strip('}{')
243
- if self._rewrite:
244
- script = self.decode_scriptures(self._encoded[script])[0]
245
- if self._upper:
246
- script = script.upper()
247
- if tag:
248
- return '{{'+script+'}}'
249
- else:
250
- return script
251
-
252
- text = self._locate_scriptures(text)
253
- return regex.sub(self._tagged, r, text).replace('»»|', '{{').replace('|««', '}}')
254
-
255
-
256
- def _code_scripture(self, scripture, bk_num, rest, last):
257
-
258
- def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2, 3) as ranges (1-3)
259
- for result in self._d_dd.finditer(txt, overlapped=True):
260
- end = result.group(3)
261
- mid = result.group(2)
262
- start = result.group(1)
263
- if int(end) - int(mid) == 1:
264
- txt = regex.sub(result.group(), f"{start}-{end}", txt)
265
- for result in self._ddd.finditer(txt, overlapped=True):
266
- end = result.group(3)
267
- start = result.group(1)
268
- if int(end) - int(start) == 2:
269
- txt = regex.sub(result.group(), f"{start}-{end}", txt)
270
- for result in self._ddd.finditer(txt, overlapped=True):
271
- end = result.group(3)
272
- start = result.group(1)
273
- if int(end) - int(start) == 2:
274
- txt = regex.sub(result.group(), f"{start}-{end}", txt)
275
- for result in self._dd.finditer(txt, overlapped=True):
276
- end = result.group(2)
277
- start = result.group(1)
278
- if int(end) - int(start) == 1:
279
- txt = regex.sub(result.group(), f"{start}-{end}", txt)
280
- return txt
281
-
282
- def validate(b, ch, vs):
283
- c = int(ch)
284
- v = int(vs)
285
- if not (0 < b <= 66): # book out of range
286
- return None
287
- if not (0 < c <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter.isnull()), ['Last']].values[0]): # chapter out of range
288
- return None
289
- if not (0 < v <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter == c), ['Last']].values[0]): # verse out of range
290
- return None
291
- return True
292
-
293
- def code_verses(chunk, book, multi):
294
- b = str(book).zfill(2)
295
32
 
296
- result = self._cv_cv.search(chunk)
297
- if result:
298
- c = result.group(1)
299
- v = result.group(2)
300
- if not validate(book, c, v):
301
- return None, 0
302
- ch1 = c.zfill(3)
303
- v1 = v.zfill(3)
304
33
 
305
- c = result.group(3)
306
- v = result.group(4)
307
- if not validate(book, c, v):
308
- return None, 0
309
- ch2 = c.zfill(3)
310
- v2 = v.zfill(3)
311
- return (b+ch1+v1, b+ch2+v2), ch2
312
-
313
- result = self._cv_v.search(chunk)
314
- if result:
315
- c = result.group(1)
316
- v = result.group(2)
317
- if not validate(book, c, v):
318
- return None, 0
319
- ch1 = c.zfill(3)
320
- v1 = v.zfill(3)
321
-
322
- v = result.group(3)
323
- if not validate(book, c, v):
324
- return None, 0
325
- v2 = v.zfill(3)
326
- return (b+ch1+v1, b+ch1+v2), ch1
327
-
328
- result = self._v_cv.search(chunk)
329
- if result:
330
- c = str(ch)
331
- v = result.group(1)
332
- if not validate(book, c, v):
333
- return None, 0
334
- ch1 = c
335
- v1 = v.zfill(3)
336
-
337
- c = result.group(2)
338
- v = result.group(3)
339
- if not validate(book, c, v):
340
- return None, 0
341
- ch2 = c.zfill(3)
342
- v2 = v.zfill(3)
343
- return (b+ch1+v1, b+ch2+v2), ch2
344
-
345
- result = self._cv.search(chunk)
346
- if result:
347
- c = result.group(1)
348
- v = result.group(2)
349
- if not validate(book, c, v):
350
- return None, 0
351
- ch1 = c.zfill(3)
352
- v1 = v.zfill(3)
353
- return (b+ch1+v1, b+ch1+v1), ch1
354
-
355
- result = self._d_d.search(chunk)
356
- if result:
357
- if multi:
358
- c = result.group(1)
359
- v = 1
360
- if not validate(book, c, v):
361
- return None, 0
362
- ch1 = c.zfill(3)
363
- v1 = '001'
364
-
365
- c = result.group(2)
366
- if not validate(book, c, v):
367
- return None, 0
368
- ch2 = c.zfill(3)
369
- v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch2)), ['Last']].values[0][0]).zfill(3)
370
- return (b+ch1+v1, b+ch2+v2), None
371
- else:
372
- c = 1
373
- v = result.group(1)
374
- if not validate(book, c, v):
375
- return None, 0
376
- ch1 = '001'
377
- v1 = v.zfill(3)
378
-
379
- v = result.group(2)
380
- if not validate(book, c, v):
381
- return None, 0
382
- ch2 = ch1
383
- v2 = v.zfill(3)
384
- return (b+ch1+v1, b+ch2+v2), ch2
385
-
386
- result = self._d.search(chunk)
387
- if result:
388
- if multi:
389
- c = result.group(1)
390
- v = 1
391
- if not validate(book, c, v):
392
- return None, 0
393
- ch1 = c.zfill(3)
394
- v1 = '001'
395
- v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch1)), ['Last']].values[0][0]).zfill(3)
396
- return (b+ch1+v1, b+ch1+v2), None
397
- else:
398
- c = 1
399
- v = result.group(1)
400
- if not validate(book, c, v):
401
- return None, 0
402
- ch1 = '001'
403
- v1 = v.zfill(3)
404
- return (b+ch1+v1, b+ch1+v1), None
405
-
406
- return None, None
407
-
408
- lst = []
409
- if rest == '': # whole book
410
- v = self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter == last), ['Last']].values[0][0]
411
- if last == 1:
412
- rest = f'1-{v}'
413
- else:
414
- rest = f'1:1-{last}:{v}'
415
- else:
416
- rest = reform_series(rest)
417
- for chunk in rest.split(';'):
418
- ch = None
419
- for bit in chunk.split(','):
420
- if ch:
421
- tup, ch = code_verses(f"{ch}:{bit}", bk_num, last>1)
422
- else:
423
- tup, ch = code_verses(bit, bk_num, last>1)
424
- if not tup:
425
- self._error_report(scripture, f'"{bit.strip()}" OUT OF RANGE')
426
- return None
427
- lst.append(tup)
428
- return lst
429
-
430
- def code_scriptures(self, text):
431
- text = self._locate_scriptures(text)
432
- lst = []
433
- for scripture in regex.findall(self._tagged, text):
434
- bcv_ranges = self._encoded[scripture.strip('}{')]
435
- for bcv_range in bcv_ranges:
436
- lst.append(bcv_range)
437
- return lst
438
-
439
-
440
- def _decode_scripture(self, bcv_range, book='', chap=0, sep=';'):
441
- if not bcv_range:
442
- return None, '', 0, False
443
- start, end = bcv_range
444
- sb = int(start[:2])
445
- sc = int(start[2:5])
446
- sv = int(start[5:])
447
- eb = int(end[:2])
448
- ec = int(end[2:5])
449
- ev = int(end[5:])
450
-
451
- if not (sb == eb):
452
- return None, '', 0, False
453
- if not ((0 < sb <= 66) & (sb == eb)): # book out of range
454
- return None, '', 0, False
455
- lc = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0][0]
456
- if not (0 < sc <= ec <= lc): # chapter(s) out of range
457
- return None, '', 0, False
458
- se = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == sc), ['Last']].values[0][0]
459
- le = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == ec), ['Last']].values[0][0]
460
- if not ((0 < sv <= se) & (0 < ev <= le)): # verse(s) out of range
461
- return None, '', 0, False
462
- bk_name = self._tr_book_names[sb]
463
- if book == bk_name:
464
- cont = True
465
- else:
466
- cont = False
467
- book = bk_name
468
- c = ec - sc + 1
469
- v = ev - sv + 1
470
- if lc == 1:
471
- if cont:
472
- bk_name = ','
473
- if v == le:
474
- scripture = f"{bk_name.strip(',')}"
475
- elif v == 1:
476
- scripture = f"{bk_name} {sv}"
477
- elif v == 2:
478
- scripture = f"{bk_name} {sv}, {ev}"
479
- else:
480
- scripture = f"{bk_name} {sv}‑{ev}"
481
- sep = ';'
482
- else:
483
- ch = f"{sc}:"
484
- if v == le:
485
- if cont:
486
- bk_name = sep
487
- if c == lc:
488
- scripture = f"{bk_name.strip(',')}"
489
- elif c == 1:
490
- scripture = f"{bk_name} {sc}"
491
- elif c == 2:
492
- scripture = f"{bk_name} {sc}, {ec}"
493
- else:
494
- scripture = f"{bk_name} {sc}‑{ec}"
495
- sep = ','
496
- elif c == 1:
497
- if cont:
498
- if sc == chap:
499
- bk_name = ''
500
- ch = ', '
501
- else:
502
- bk_name = ';'
503
- if v == 1:
504
- scripture = f"{bk_name} {ch}{sv}"
505
- elif v == 2:
506
- scripture = f"{bk_name} {ch}{sv}, {ev}"
507
- else:
508
- scripture = f"{bk_name} {ch}{sv}‑{ev}"
509
- sep = ';'
510
- else:
511
- if cont and (sc == chap):
512
- bk_name = ''
513
- ch = ', '
514
- scripture = f"{bk_name} {ch}{sv}‑{ec}:{ev}"
515
- sep = ';'
516
- chap = ec
517
- if self._separator != ' ':
518
- scripture = regex.sub(self._sep, self._separator, scripture)
519
- return scripture.strip(), book, chap, cont, sep
520
-
521
- def decode_scriptures(self, bcv_ranges=[]):
522
- scriptures = []
523
- bk = ''
524
- ch = 0
525
- sep = ';'
526
- for bcv_range in bcv_ranges:
527
- scripture, bk, ch, cont, sep = self._decode_scripture(bcv_range, bk, ch, sep)
528
- if scripture:
529
- if cont:
530
- scriptures[-1] = scriptures[-1] + scripture
531
- else:
532
- scriptures.append(scripture)
533
- return scriptures
534
-
535
-
536
- def link_scriptures(self, text, prefix='<a href=', suffix='>'): # NOTE: this always rewrites (full by default) - what if one wants to leave as is??
537
-
538
- def convert_range(bcv_range):
539
- if not bcv_range:
540
- return None, None
541
- start, end = bcv_range
542
- sb = int(start[:2])
543
- sc = int(start[2:5])
544
- sv = int(start[5:])
545
- eb = int(end[:2])
546
- ec = int(end[2:5])
547
- ev = int(end[5:])
548
- if start == end:
549
- return f"{sb}:{sc}:{sv}"
550
- else:
551
- return f"{sb}:{sc}:{sv}-{eb}:{ec}:{ev}"
552
-
553
- def r1(match):
554
-
555
- def r2(match):
556
- return f'{prefix}{lnk}{suffix}{match.group(1)}</a>'
557
-
558
- scripture = match.group(1).strip('}{')
559
- if scripture in self._linked.keys():
560
- return self._linked[scripture]
561
- output = ''
562
- bk = ''
563
- ch = 0
564
- sep = ';'
565
- for bcv_range in self._encoded[scripture]:
566
- scrip, bk, ch, _, sep = self._decode_scripture(bcv_range, bk, ch, sep)
567
- lnk = convert_range(bcv_range)
568
- output += regex.sub(self._chunk, r2, scrip)
569
- self._linked[scripture] = output.strip(' ;,')
570
- if self._upper:
571
- output = output.upper()
572
- return output.strip(' ;,')
573
-
574
- text = self._locate_scriptures(text)
575
- return regex.sub(self._tagged, r1, text).replace('»»|', '{{').replace('|««', '}}')
576
-
577
-
578
- def serial_chapter_number(self, bcv):
579
- try:
580
- return int(self._chapters.loc[(self._chapters['Book'] == int(bcv[0:2])) & (self._chapters['Chapter'] == int(bcv[2:5]))].values[0][0])
581
- except:
582
- self._error_report(bcv, 'OUT OF RANGE')
583
- return None
584
-
585
- def serial_verse_number(self, bcv):
586
- try:
587
- return int(self._verses.loc[(self._verses['Book'] == int(bcv[0:2])) & (self._verses['Chapter'] == int(bcv[2:5])) & (self._verses['Verse'] == int(bcv[5:]))].values[0][0])
588
- except:
589
- self._error_report(bcv, 'OUT OF RANGE')
590
- return None
591
-
592
- def code_chapter(self, chapter):
593
- try:
594
- book, chapter = self._chapters[self._chapters['ChapterId'] == int(chapter)].values[0][1:]
595
- last = self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == chapter), ['Last']].values[0][0]
596
- bcv = str(book).zfill(2) + str(chapter).zfill(3)
597
- return f"('{bcv}001', '{bcv}{str(last).zfill(3)}')"
598
- except:
599
- self._error_report(chapter, 'OUT OF RANGE')
600
- return None
601
-
602
- def code_verse(self, verse):
603
- bcv = ''
604
- try:
605
- for i in self._verses[self._verses['VerseId'] == int(verse)].values[0][1:]:
606
- bcv += str(i).zfill(3)
607
- return f"('{bcv[1:]}', '{bcv[1:]}')"
608
- except:
609
- self._error_report(verse, 'OUT OF RANGE')
610
- return None
611
-
612
-
613
- def _main(args):
34
+ def main(args):
614
35
 
615
36
  def switchboard(text):
616
37
  if args['cc']:
@@ -671,43 +92,42 @@ def _main(args):
671
92
  else:
672
93
  print(txt)
673
94
 
674
- if __name__ == "__main__":
675
- PROJECT_PATH = Path(__file__).resolve().parent
676
- APP = 'linkture' # Path(__file__).stem
677
- parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
678
-
679
- parser.add_argument('-v', action='version', version=f"{APP} {__version__}", help='show version and exit')
680
- parser.add_argument('-q', action='store_true', help="don't show errors")
681
-
682
- function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
683
- mode = function_group.add_mutually_exclusive_group()
684
- mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
685
- mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
686
- parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
687
-
688
- parser.add_argument('--language', default='English', choices=available_languages, help='indicate source language for book names (English if unspecified)')
689
- parser.add_argument('--translate', choices=available_languages, help='indicate output language for book names (same as source if unspecified)')
690
- parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
691
- parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
692
- format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
693
- formats = format_group.add_mutually_exclusive_group()
694
- formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
695
- formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
696
- formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
697
-
698
- type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
699
- tpe = type_group.add_mutually_exclusive_group(required=False)
700
- tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
701
- tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
702
- tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
703
- tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
704
- tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
705
95
 
706
- aux_group = parser.add_argument_group('auxiliary functions')
707
- aux = aux_group.add_mutually_exclusive_group(required=False)
708
- aux.add_argument('-sc', metavar=('BCV'), help='return the serial number (1-1189) of the chapter with code "BCV" ("bbcccvvv")')
709
- aux.add_argument('-sv', metavar=('BCV'), help='return the serial number (1-31091) of the verse with code "BCV" ("bbcccvvv")')
710
- aux.add_argument('-cc', metavar=('chapter'), help='return the BCV range for serial chapter number "chapter" (integer value)')
711
- aux.add_argument('-cv', metavar=('verse'), help='return the BCV code for serial verse number "verse" (integer value)')
712
- args = parser.parse_args()
713
- _main(vars(args))
96
+ parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
97
+
98
+ parser.add_argument('-v', action='version', version=f"{__app__} {__version__}", help='show version and exit')
99
+ parser.add_argument('-q', action='store_true', help="don't show errors")
100
+
101
+ function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
102
+ mode = function_group.add_mutually_exclusive_group()
103
+ mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
104
+ mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
105
+ parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
106
+
107
+ parser.add_argument('--language', default='English', choices=available_languages, help='indicate source language for book names (English if unspecified)')
108
+ parser.add_argument('--translate', choices=available_languages, help='indicate output language for book names (same as source if unspecified)')
109
+ parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
110
+ parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
111
+ format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
112
+ formats = format_group.add_mutually_exclusive_group()
113
+ formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
114
+ formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
115
+ formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
116
+
117
+ type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
118
+ tpe = type_group.add_mutually_exclusive_group(required=False)
119
+ tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
120
+ tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
121
+ tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
122
+ tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
123
+ tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
124
+
125
+ aux_group = parser.add_argument_group('auxiliary functions')
126
+ aux = aux_group.add_mutually_exclusive_group(required=False)
127
+ aux.add_argument('-sc', metavar=('BCV'), help='return the serial number (1-1189) of the chapter with code "BCV" ("bbcccvvv")')
128
+ aux.add_argument('-sv', metavar=('BCV'), help='return the serial number (1-31091) of the verse with code "BCV" ("bbcccvvv")')
129
+ aux.add_argument('-cc', metavar=('chapter'), help='return the BCV range for serial chapter number "chapter" (integer value)')
130
+ aux.add_argument('-cv', metavar=('verse'), help='return the BCV code for serial verse number "verse" (integer value)')
131
+
132
+ args = parser.parse_args()
133
+ main(vars(args))
linkture/linkture.py ADDED
@@ -0,0 +1,607 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ File: linkture
5
+
6
+ Description: Parse and process Bible scripture references
7
+
8
+ MIT License: Copyright (c) 2024 Eryk J.
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ """
28
+
29
+ __app__ = 'linkture'
30
+ __version__ = 'v2.6.0'
31
+
32
+
33
+ import json, regex, sqlite3
34
+ import pandas as pd
35
+ from unidecode import unidecode
36
+
37
+
38
+ available_languages = ('Cebuano', 'Chinese', 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Hungarian', 'Italian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese', 'Russian', 'Spanish', 'Tagalog', 'Ukrainian')
39
+ non_latin = ('Chinese', 'Greek', 'Japanese', 'Korean', 'Russian', 'Ukrainian')
40
+
41
+
42
+ class Scriptures():
43
+
44
+ def __init__(self, language='English', translate=None, form=None, separator=' ', upper=False, verbose=False):
45
+ self._verbose = verbose
46
+ self._separator = separator
47
+ if language not in available_languages:
48
+ raise ValueError("Indicated source language is not an option!")
49
+ if translate:
50
+ if translate not in available_languages:
51
+ raise ValueError("Indicated translation language is not an option!")
52
+ else:
53
+ translate = language
54
+ if language in non_latin:
55
+ self._nl = True
56
+ else:
57
+ self._nl = False
58
+ self._rewrite = bool((language != translate) or form)
59
+ self._upper = upper
60
+ if form == "full":
61
+ form = 3
62
+ elif form == "standard":
63
+ form = 4
64
+ elif form == "official":
65
+ form = 5
66
+ else:
67
+ form = 3
68
+ self._src_book_names = {}
69
+
70
+ self._tr_book_names = ['Bible']
71
+ con = sqlite3.connect('res/resources.db')
72
+ cur = con.cursor()
73
+ for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{translate}';").fetchall():
74
+ if self._upper:
75
+ tr = rec[form].upper()
76
+ else:
77
+ tr = rec[form]
78
+ self._tr_book_names.insert(rec[2], tr)
79
+ for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
80
+ for i in range(3,6):
81
+ item = rec[i]
82
+ if not self._nl:
83
+ item = unidecode(item)
84
+ normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
85
+ self._src_book_names[normalized] = rec[2]
86
+ with open('res/custom.json', 'r', encoding='UTF-8') as json_file:
87
+ b = json.load(json_file)
88
+ if language in b.keys():
89
+ for row in b[language]:
90
+ names = row[1].split(', ')
91
+ for item in names:
92
+ if not self._nl:
93
+ item = unidecode(item)
94
+ normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
95
+ self._src_book_names[normalized] = row[0]
96
+ self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
97
+ self._verses = pd.read_sql_query("SELECT * FROM Verses;", con)
98
+ self._chapters = pd.read_sql_query("SELECT * FROM Chapters;", con)
99
+ cur.close()
100
+ con.close()
101
+ self._reported = []
102
+ self._encoded = {}
103
+ self._linked = {}
104
+
105
+ # Scripture reference parser:
106
+ self._first_pass = regex.compile(r"""(
107
+ {{.*?}} |
108
+
109
+ (?:[1-5] (?:\p{Z} |
110
+ \.\p{Z}? |
111
+ \p{Pd} |
112
+ \p{L}{1,2} (?:\p{Z} |
113
+ \.\p{Z}? |
114
+ \p{Pd}))? |
115
+ [IV]{1,3} (?:\p{Z} |
116
+ \.\p{Z}? |
117
+ \p{Pd}) )?
118
+ (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
119
+ (?:\d+\p{Z}?[:,\.\p{Pd};]\p{Z}?)*
120
+ (?<=[\p{L},:\p{Pd}]\p{Z} |
121
+ [\p{L},:\p{Pd}] |
122
+ \.)\d+
123
+ (?![,\p{Pd}\p{L}]) |
124
+
125
+ (?:[1-5] (?:\p{Z} |
126
+ \.\p{Z}? |
127
+ \p{Pd} |
128
+ \p{L}{1,2} (?:\p{Z} |
129
+ \.\p{Z}? |
130
+ \p{Pd}))? |
131
+ [IV]{1,3} (?:\p{Z} |
132
+ \.\p{Z}? |
133
+ \p{Pd}) )
134
+ (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]*\p{L}
135
+ )""", flags=regex.VERBOSE | regex.IGNORECASE)
136
+
137
+ self._second_pass = regex.compile(r"""(
138
+ (?![^{]*}) # ignore already marked
139
+ \p{L}[\p{L}\p{Pd}\.]+\p{Z}?
140
+ (?:\d+\p{Z}?[:,\p{Pd};]\p{Z}?)*\d+
141
+ (?![,\p{Pd}\p{L}])
142
+ )""", flags=regex.VERBOSE)
143
+
144
+ self._bk_ref = regex.compile(r"""
145
+ ((?:[1-5]\p{L}{0,2} |
146
+ [IV]{1,3} )?
147
+ [\p{Pd}\.]?[\p{L}\p{Pd}\.\p{Z}]{2,})(.*)
148
+ """, flags=regex.VERBOSE | regex.IGNORECASE)
149
+
150
+ self._tagged = regex.compile(r'({{.*?}})')
151
+
152
+ self._cv_cv = regex.compile(r'(\d+):(\d+)-(\d+):(\d+)')
153
+ self._v_cv = regex.compile(r'(\d+)-(\d+):(\d+)')
154
+ self._cv_v = regex.compile(r'(\d+):(\d+)-(\d+)')
155
+ self._cv = regex.compile(r'(\d+):(\d+)')
156
+ self._ddd = regex.compile(r'(\d+),(\d+),(\d+)')
157
+ self._dd_d = regex.compile(r'(\d+),(\d+)-(\d+)')
158
+ self._d_dd = regex.compile(r'(\d+)-(\d+),(\d+)')
159
+ self._d_d = regex.compile(r'(\d+)-(\d+)(?!:)')
160
+ self._dd = regex.compile(r'(\d+),(\d+)')
161
+ self._d = regex.compile(r'(\d+)')
162
+
163
+ self._chunk = regex.compile(r'([^,;\p{Z}]+.*)')
164
+ self._sep = regex.compile(r'(?<!;)\s')
165
+
166
+ def _error_report(self, scripture, message):
167
+ if self._verbose and (scripture not in self._reported):
168
+ print(f'** "{scripture}" - {message}')
169
+ self._reported.append(scripture)
170
+
171
+ def _scripture_parts(self, scripture):
172
+
173
+ def check_book(bk_name):
174
+ if not self._nl:
175
+ bk_name = unidecode(bk_name) # NOTE: this converts Génesis to Genesis and English recognizes it !! Feature :-)
176
+ bk_name = regex.sub(r'\p{P}|\p{Z}', '', bk_name.upper())
177
+ if bk_name not in self._src_book_names:
178
+ return None, 0
179
+ else:
180
+ bk_num = self._src_book_names[bk_name]
181
+ return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]
182
+
183
+ reduced = regex.sub(r'\p{Z}', '', scripture)
184
+ reduced = regex.sub(r'\p{Pd}', '-', reduced)
185
+ result = self._bk_ref.search(reduced)
186
+ if result:
187
+ bk_name, rest = result.group(1).strip(), result.group(2).strip()
188
+ bk_num, last = check_book(bk_name)
189
+ if bk_num:
190
+ tr_name = self._tr_book_names[bk_num]
191
+ return tr_name, rest.replace('.', ':'), bk_num, last # for period notation cases (Gen 1.1)
192
+ return None, None, None, 0
193
+
194
+ def _locate_scriptures(self, text):
195
+
196
+ def r(match):
197
+ scripture = match.group(1)
198
+ if regex.match(r'{{.*}}', scripture):
199
+ tag = True
200
+ scripture = scripture.strip('}{')
201
+ else:
202
+ tag = False
203
+ if scripture in self._encoded.keys():
204
+ return '{{' + scripture +'}}'
205
+ _, rest, bk_num, last = self._scripture_parts(scripture)
206
+ if bk_num:
207
+ code = self._code_scripture(scripture, bk_num, rest, last) # validation performed
208
+ if code:
209
+ self._encoded[scripture] = code
210
+ return '{{' + scripture +'}}'
211
+ if tag:
212
+ return '»»|' + scripture +'|««' # So as not to lose {{ }} on unrecognized pre-tagged scriptures (other language, etc.)
213
+ else:
214
+ return scripture
215
+
216
+ self._reported = []
217
+ text = regex.sub(self._first_pass, r, text)
218
+ return regex.sub(self._second_pass, r, text)
219
+
220
+
221
+ def list_scriptures(self, text):
222
+ lst = []
223
+ text = self._locate_scriptures(text)
224
+ for scripture in regex.findall(self._tagged, text):
225
+ script = scripture.strip('}{')
226
+ if self._rewrite:
227
+ script = self.decode_scriptures(self._encoded[script])[0]
228
+ if self._upper:
229
+ script = script.upper()
230
+ lst.append(script)
231
+ return lst
232
+
233
+ def tag_scriptures(self, text):
234
+ return self.rewrite_scriptures(text, True)
235
+
236
+ def rewrite_scriptures(self, text, tag=False):
237
+
238
+ def r(match):
239
+ script = match.group(1).strip('}{')
240
+ if self._rewrite:
241
+ script = self.decode_scriptures(self._encoded[script])[0]
242
+ if self._upper:
243
+ script = script.upper()
244
+ if tag:
245
+ return '{{'+script+'}}'
246
+ else:
247
+ return script
248
+
249
+ text = self._locate_scriptures(text)
250
+ return regex.sub(self._tagged, r, text).replace('»»|', '{{').replace('|««', '}}')
251
+
252
+
253
+ def _code_scripture(self, scripture, bk_num, rest, last):
254
+
255
+ def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2, 3) as ranges (1-3)
256
+ for result in self._d_dd.finditer(txt, overlapped=True):
257
+ end = result.group(3)
258
+ mid = result.group(2)
259
+ start = result.group(1)
260
+ if int(end) - int(mid) == 1:
261
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
262
+ for result in self._ddd.finditer(txt, overlapped=True):
263
+ end = result.group(3)
264
+ start = result.group(1)
265
+ if int(end) - int(start) == 2:
266
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
267
+ for result in self._ddd.finditer(txt, overlapped=True):
268
+ end = result.group(3)
269
+ start = result.group(1)
270
+ if int(end) - int(start) == 2:
271
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
272
+ for result in self._dd.finditer(txt, overlapped=True):
273
+ end = result.group(2)
274
+ start = result.group(1)
275
+ if int(end) - int(start) == 1:
276
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
277
+ return txt
278
+
279
+ def validate(b, ch, vs):
280
+ c = int(ch)
281
+ v = int(vs)
282
+ if not (0 < b <= 66): # book out of range
283
+ return None
284
+ if not (0 < c <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter.isnull()), ['Last']].values[0]): # chapter out of range
285
+ return None
286
+ if not (0 < v <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter == c), ['Last']].values[0]): # verse out of range
287
+ return None
288
+ return True
289
+
290
+ def code_verses(chunk, book, multi):
291
+ b = str(book).zfill(2)
292
+
293
+ result = self._cv_cv.search(chunk)
294
+ if result:
295
+ c = result.group(1)
296
+ v = result.group(2)
297
+ if not validate(book, c, v):
298
+ return None, 0
299
+ ch1 = c.zfill(3)
300
+ v1 = v.zfill(3)
301
+
302
+ c = result.group(3)
303
+ v = result.group(4)
304
+ if not validate(book, c, v):
305
+ return None, 0
306
+ ch2 = c.zfill(3)
307
+ v2 = v.zfill(3)
308
+ return (b+ch1+v1, b+ch2+v2), ch2
309
+
310
+ result = self._cv_v.search(chunk)
311
+ if result:
312
+ c = result.group(1)
313
+ v = result.group(2)
314
+ if not validate(book, c, v):
315
+ return None, 0
316
+ ch1 = c.zfill(3)
317
+ v1 = v.zfill(3)
318
+
319
+ v = result.group(3)
320
+ if not validate(book, c, v):
321
+ return None, 0
322
+ v2 = v.zfill(3)
323
+ return (b+ch1+v1, b+ch1+v2), ch1
324
+
325
+ result = self._v_cv.search(chunk)
326
+ if result:
327
+ c = str(ch)
328
+ v = result.group(1)
329
+ if not validate(book, c, v):
330
+ return None, 0
331
+ ch1 = c
332
+ v1 = v.zfill(3)
333
+
334
+ c = result.group(2)
335
+ v = result.group(3)
336
+ if not validate(book, c, v):
337
+ return None, 0
338
+ ch2 = c.zfill(3)
339
+ v2 = v.zfill(3)
340
+ return (b+ch1+v1, b+ch2+v2), ch2
341
+
342
+ result = self._cv.search(chunk)
343
+ if result:
344
+ c = result.group(1)
345
+ v = result.group(2)
346
+ if not validate(book, c, v):
347
+ return None, 0
348
+ ch1 = c.zfill(3)
349
+ v1 = v.zfill(3)
350
+ return (b+ch1+v1, b+ch1+v1), ch1
351
+
352
+ result = self._d_d.search(chunk)
353
+ if result:
354
+ if multi:
355
+ c = result.group(1)
356
+ v = 1
357
+ if not validate(book, c, v):
358
+ return None, 0
359
+ ch1 = c.zfill(3)
360
+ v1 = '001'
361
+
362
+ c = result.group(2)
363
+ if not validate(book, c, v):
364
+ return None, 0
365
+ ch2 = c.zfill(3)
366
+ v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch2)), ['Last']].values[0][0]).zfill(3)
367
+ return (b+ch1+v1, b+ch2+v2), None
368
+ else:
369
+ c = 1
370
+ v = result.group(1)
371
+ if not validate(book, c, v):
372
+ return None, 0
373
+ ch1 = '001'
374
+ v1 = v.zfill(3)
375
+
376
+ v = result.group(2)
377
+ if not validate(book, c, v):
378
+ return None, 0
379
+ ch2 = ch1
380
+ v2 = v.zfill(3)
381
+ return (b+ch1+v1, b+ch2+v2), ch2
382
+
383
+ result = self._d.search(chunk)
384
+ if result:
385
+ if multi:
386
+ c = result.group(1)
387
+ v = 1
388
+ if not validate(book, c, v):
389
+ return None, 0
390
+ ch1 = c.zfill(3)
391
+ v1 = '001'
392
+ v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch1)), ['Last']].values[0][0]).zfill(3)
393
+ return (b+ch1+v1, b+ch1+v2), None
394
+ else:
395
+ c = 1
396
+ v = result.group(1)
397
+ if not validate(book, c, v):
398
+ return None, 0
399
+ ch1 = '001'
400
+ v1 = v.zfill(3)
401
+ return (b+ch1+v1, b+ch1+v1), None
402
+
403
+ return None, None
404
+
405
+ lst = []
406
+ if rest == '': # whole book
407
+ v = self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter == last), ['Last']].values[0][0]
408
+ if last == 1:
409
+ rest = f'1-{v}'
410
+ else:
411
+ rest = f'1:1-{last}:{v}'
412
+ else:
413
+ rest = reform_series(rest)
414
+ for chunk in rest.split(';'):
415
+ ch = None
416
+ for bit in chunk.split(','):
417
+ if ch:
418
+ tup, ch = code_verses(f"{ch}:{bit}", bk_num, last>1)
419
+ else:
420
+ tup, ch = code_verses(bit, bk_num, last>1)
421
+ if not tup:
422
+ self._error_report(scripture, f'"{bit.strip()}" OUT OF RANGE')
423
+ return None
424
+ lst.append(tup)
425
+ return lst
426
+
427
+ def code_scriptures(self, text):
428
+ text = self._locate_scriptures(text)
429
+ lst = []
430
+ for scripture in regex.findall(self._tagged, text):
431
+ bcv_ranges = self._encoded[scripture.strip('}{')]
432
+ for bcv_range in bcv_ranges:
433
+ lst.append(bcv_range)
434
+ return lst
435
+
436
+
437
+ def _decode_scripture(self, bcv_range, book='', chap=0, sep=';'):
438
+ if not bcv_range:
439
+ return None, '', 0, False
440
+ start, end = bcv_range
441
+ sb = int(start[:2])
442
+ sc = int(start[2:5])
443
+ sv = int(start[5:])
444
+ eb = int(end[:2])
445
+ ec = int(end[2:5])
446
+ ev = int(end[5:])
447
+
448
+ if not (sb == eb):
449
+ return None, '', 0, False
450
+ if not ((0 < sb <= 66) & (sb == eb)): # book out of range
451
+ return None, '', 0, False
452
+ lc = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0][0]
453
+ if not (0 < sc <= ec <= lc): # chapter(s) out of range
454
+ return None, '', 0, False
455
+ se = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == sc), ['Last']].values[0][0]
456
+ le = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == ec), ['Last']].values[0][0]
457
+ if not ((0 < sv <= se) & (0 < ev <= le)): # verse(s) out of range
458
+ return None, '', 0, False
459
+ bk_name = self._tr_book_names[sb]
460
+ if book == bk_name:
461
+ cont = True
462
+ else:
463
+ cont = False
464
+ book = bk_name
465
+ c = ec - sc + 1
466
+ v = ev - sv + 1
467
+ if lc == 1:
468
+ if cont:
469
+ bk_name = ','
470
+ if v == le:
471
+ scripture = f"{bk_name.strip(',')}"
472
+ elif v == 1:
473
+ scripture = f"{bk_name} {sv}"
474
+ elif v == 2:
475
+ scripture = f"{bk_name} {sv}, {ev}"
476
+ else:
477
+ scripture = f"{bk_name} {sv}‑{ev}"
478
+ sep = ';'
479
+ else:
480
+ ch = f"{sc}:"
481
+ if v == le:
482
+ if cont:
483
+ bk_name = sep
484
+ if c == lc:
485
+ scripture = f"{bk_name.strip(',')}"
486
+ elif c == 1:
487
+ scripture = f"{bk_name} {sc}"
488
+ elif c == 2:
489
+ scripture = f"{bk_name} {sc}, {ec}"
490
+ else:
491
+ scripture = f"{bk_name} {sc}‑{ec}"
492
+ sep = ','
493
+ elif c == 1:
494
+ if cont:
495
+ if sc == chap:
496
+ bk_name = ''
497
+ ch = ', '
498
+ else:
499
+ bk_name = ';'
500
+ if v == 1:
501
+ scripture = f"{bk_name} {ch}{sv}"
502
+ elif v == 2:
503
+ scripture = f"{bk_name} {ch}{sv}, {ev}"
504
+ else:
505
+ scripture = f"{bk_name} {ch}{sv}‑{ev}"
506
+ sep = ';'
507
+ else:
508
+ if cont and (sc == chap):
509
+ bk_name = ''
510
+ ch = ', '
511
+ scripture = f"{bk_name} {ch}{sv}‑{ec}:{ev}"
512
+ sep = ';'
513
+ chap = ec
514
+ if self._separator != ' ':
515
+ scripture = regex.sub(self._sep, self._separator, scripture)
516
+ return scripture.strip(), book, chap, cont, sep
517
+
518
+ def decode_scriptures(self, bcv_ranges=[]):
519
+ scriptures = []
520
+ bk = ''
521
+ ch = 0
522
+ sep = ';'
523
+ for bcv_range in bcv_ranges:
524
+ scripture, bk, ch, cont, sep = self._decode_scripture(bcv_range, bk, ch, sep)
525
+ if scripture:
526
+ if cont:
527
+ scriptures[-1] = scriptures[-1] + scripture
528
+ else:
529
+ scriptures.append(scripture)
530
+ return scriptures
531
+
532
+
533
+ def link_scriptures(self, text, prefix='<a href=', suffix='>'): # NOTE: this always rewrites (full by default) - what if one wants to leave as is??
534
+
535
+ def convert_range(bcv_range):
536
+ if not bcv_range:
537
+ return None, None
538
+ start, end = bcv_range
539
+ sb = int(start[:2])
540
+ sc = int(start[2:5])
541
+ sv = int(start[5:])
542
+ eb = int(end[:2])
543
+ ec = int(end[2:5])
544
+ ev = int(end[5:])
545
+ if start == end:
546
+ return f"{sb}:{sc}:{sv}"
547
+ else:
548
+ return f"{sb}:{sc}:{sv}-{eb}:{ec}:{ev}"
549
+
550
+ def r1(match):
551
+
552
+ def r2(match):
553
+ return f'{prefix}{lnk}{suffix}{match.group(1)}</a>'
554
+
555
+ scripture = match.group(1).strip('}{')
556
+ if scripture in self._linked.keys():
557
+ return self._linked[scripture]
558
+ output = ''
559
+ bk = ''
560
+ ch = 0
561
+ sep = ';'
562
+ for bcv_range in self._encoded[scripture]:
563
+ scrip, bk, ch, _, sep = self._decode_scripture(bcv_range, bk, ch, sep)
564
+ lnk = convert_range(bcv_range)
565
+ output += regex.sub(self._chunk, r2, scrip)
566
+ self._linked[scripture] = output.strip(' ;,')
567
+ if self._upper:
568
+ output = output.upper()
569
+ return output.strip(' ;,')
570
+
571
+ text = self._locate_scriptures(text)
572
+ return regex.sub(self._tagged, r1, text).replace('»»|', '{{').replace('|««', '}}')
573
+
574
+
575
+ def serial_chapter_number(self, bcv):
576
+ try:
577
+ return int(self._chapters.loc[(self._chapters['Book'] == int(bcv[0:2])) & (self._chapters['Chapter'] == int(bcv[2:5]))].values[0][0])
578
+ except:
579
+ self._error_report(bcv, 'OUT OF RANGE')
580
+ return None
581
+
582
+ def serial_verse_number(self, bcv):
583
+ try:
584
+ return int(self._verses.loc[(self._verses['Book'] == int(bcv[0:2])) & (self._verses['Chapter'] == int(bcv[2:5])) & (self._verses['Verse'] == int(bcv[5:]))].values[0][0])
585
+ except:
586
+ self._error_report(bcv, 'OUT OF RANGE')
587
+ return None
588
+
589
+ def code_chapter(self, chapter):
590
+ try:
591
+ book, chapter = self._chapters[self._chapters['ChapterId'] == int(chapter)].values[0][1:]
592
+ last = self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == chapter), ['Last']].values[0][0]
593
+ bcv = str(book).zfill(2) + str(chapter).zfill(3)
594
+ return f"('{bcv}001', '{bcv}{str(last).zfill(3)}')"
595
+ except:
596
+ self._error_report(chapter, 'OUT OF RANGE')
597
+ return None
598
+
599
+ def code_verse(self, verse):
600
+ bcv = ''
601
+ try:
602
+ for i in self._verses[self._verses['VerseId'] == int(verse)].values[0][1:]:
603
+ bcv += str(i).zfill(3)
604
+ return f"('{bcv[1:]}', '{bcv[1:]}')"
605
+ except:
606
+ self._error_report(verse, 'OUT OF RANGE')
607
+ return None
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkture
3
- Version: 2.5.6
3
+ Version: 2.6.0
4
4
  Summary: PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode
5
5
  Keywords: bible,scriptures,scripture-references,scripture-translation,scripture-parser,scripture-linker
6
6
  Author-Email: "Eryk J." <infiniti@inventati.org>
@@ -23,7 +23,6 @@ Requires-Dist: setuptools>=59.6.0
23
23
  Requires-Dist: argparse>=1.4.0
24
24
  Requires-Dist: regex>=2023.8.8
25
25
  Requires-Dist: unidecode>=1.3.8
26
- Requires-Dist: pathlib>=1.0.1
27
26
  Requires-Dist: pandas==2.2.2
28
27
  Description-Content-Type: text/markdown
29
28
 
@@ -0,0 +1,10 @@
1
+ linkture-2.6.0.dist-info/METADATA,sha256=DpJS7s4hvIkgsgQTM72-WH70NlDF5TteOUAKyyqrs5o,11068
2
+ linkture-2.6.0.dist-info/WHEEL,sha256=7sv5iXvIiTVJSnAxCz2tGBm9DHsb2vPSzeYeT7pvGUY,90
3
+ linkture-2.6.0.dist-info/licenses/LICENSE,sha256=kPqKoVmo3Tx1HgQvqfjBZuYkjT1mZXnQ5R0KBbEeFfs,1064
4
+ linkture/__init__.py,sha256=-CsRDvXLUig8T6RvwkktRP8e8DWrpjlyqBcw26kOv1E,47
5
+ linkture/__main__.py,sha256=4Z6KGvQL4kX42H5P7yDz3xiLf2TS89P-d5IYsxqOlSI,6427
6
+ linkture/linkture.py,sha256=hLFvzqj7dLU-xuFW_rAP1Emc034DKo2VcF72tYvO1rc,23670
7
+ linkture/res/custom.json,sha256=PnCI0N5uBn1ZzEG05V3r8uwrW2uBogCQ_uCQKTHJe4E,1904
8
+ linkture/res/resources.db,sha256=ceXVt21jdJwJCCWsfMPy6aRcHo9yHzyTQk_i22aJZD8,581632
9
+ linkture/res/rss-36.png,sha256=DeZ-xvFxyjeHSNHen3inNFWPm4qHdlNI3MQ1fclu9CQ,1297
10
+ linkture-2.6.0.dist-info/RECORD,,
@@ -1,9 +0,0 @@
1
- linkture-2.5.6.dist-info/METADATA,sha256=9nwgo8weHqXaLaEXL94dox0MYMxLqF1jBMCNrtoDVg4,11098
2
- linkture-2.5.6.dist-info/WHEEL,sha256=7sv5iXvIiTVJSnAxCz2tGBm9DHsb2vPSzeYeT7pvGUY,90
3
- linkture-2.5.6.dist-info/licenses/LICENSE,sha256=kPqKoVmo3Tx1HgQvqfjBZuYkjT1mZXnQ5R0KBbEeFfs,1064
4
- linkture/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- linkture/__main__.py,sha256=ca0nbx9LqDNeNFT2TsJRrgcDVq7WOfl1z_lo8BAjdXg,29101
6
- linkture/res/custom.json,sha256=PnCI0N5uBn1ZzEG05V3r8uwrW2uBogCQ_uCQKTHJe4E,1904
7
- linkture/res/resources.db,sha256=ceXVt21jdJwJCCWsfMPy6aRcHo9yHzyTQk_i22aJZD8,581632
8
- linkture/res/rss-36.png,sha256=DeZ-xvFxyjeHSNHen3inNFWPm4qHdlNI3MQ1fclu9CQ,1297
9
- linkture-2.5.6.dist-info/RECORD,,