linkture 2.5.6__py3-none-any.whl → 2.6.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- linkture/__init__.py +3 -0
- linkture/__main__.py +45 -625
- linkture/linkture.py +607 -0
- {linkture-2.5.6.dist-info → linkture-2.6.1.dist-info}/METADATA +15 -11
- linkture-2.6.1.dist-info/RECORD +10 -0
- linkture-2.5.6.dist-info/RECORD +0 -9
- {linkture-2.5.6.dist-info → linkture-2.6.1.dist-info}/WHEEL +0 -0
- {linkture-2.5.6.dist-info → linkture-2.6.1.dist-info}/licenses/LICENSE +0 -0
linkture/__main__.py
CHANGED
@@ -26,591 +26,12 @@
|
|
26
26
|
SOFTWARE.
|
27
27
|
"""
|
28
28
|
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
import argparse, json, regex, sqlite3
|
33
|
-
import pandas as pd
|
34
|
-
|
29
|
+
import argparse
|
30
|
+
from .linkture import _available_languages, __app__, __version__, Scriptures
|
35
31
|
from ast import literal_eval
|
36
|
-
from pathlib import Path
|
37
|
-
from unidecode import unidecode
|
38
|
-
|
39
|
-
|
40
|
-
available_languages = ('Cebuano', 'Chinese', 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Hungarian', 'Italian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese', 'Russian', 'Spanish', 'Tagalog', 'Ukrainian')
|
41
|
-
non_latin = ('Chinese', 'Greek', 'Japanese', 'Korean', 'Russian', 'Ukrainian')
|
42
|
-
|
43
|
-
|
44
|
-
class Scriptures():
|
45
|
-
|
46
|
-
def __init__(self, language='English', translate=None, form=None, separator=' ', upper=False, verbose=False):
|
47
|
-
self._verbose = verbose
|
48
|
-
self._separator = separator
|
49
|
-
if language not in available_languages:
|
50
|
-
raise ValueError("Indicated source language is not an option!")
|
51
|
-
if translate:
|
52
|
-
if translate not in available_languages:
|
53
|
-
raise ValueError("Indicated translation language is not an option!")
|
54
|
-
else:
|
55
|
-
translate = language
|
56
|
-
if language in non_latin:
|
57
|
-
self._nl = True
|
58
|
-
else:
|
59
|
-
self._nl = False
|
60
|
-
self._rewrite = bool((language != translate) or form)
|
61
|
-
self._upper = upper
|
62
|
-
if form == "full":
|
63
|
-
form = 3
|
64
|
-
elif form == "standard":
|
65
|
-
form = 4
|
66
|
-
elif form == "official":
|
67
|
-
form = 5
|
68
|
-
else:
|
69
|
-
form = 3
|
70
|
-
self._src_book_names = {}
|
71
|
-
path = Path(__file__).resolve().parent
|
72
|
-
|
73
|
-
self._tr_book_names = ['Bible']
|
74
|
-
con = sqlite3.connect(path / 'res/resources.db')
|
75
|
-
cur = con.cursor()
|
76
|
-
for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{translate}';").fetchall():
|
77
|
-
if self._upper:
|
78
|
-
tr = rec[form].upper()
|
79
|
-
else:
|
80
|
-
tr = rec[form]
|
81
|
-
self._tr_book_names.insert(rec[2], tr)
|
82
|
-
for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
|
83
|
-
for i in range(3,6):
|
84
|
-
item = rec[i]
|
85
|
-
if not self._nl:
|
86
|
-
item = unidecode(item)
|
87
|
-
normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
|
88
|
-
self._src_book_names[normalized] = rec[2]
|
89
|
-
with open(path / 'res/custom.json', 'r', encoding='UTF-8') as json_file:
|
90
|
-
b = json.load(json_file)
|
91
|
-
if language in b.keys():
|
92
|
-
for row in b[language]:
|
93
|
-
names = row[1].split(', ')
|
94
|
-
for item in names:
|
95
|
-
if not self._nl:
|
96
|
-
item = unidecode(item)
|
97
|
-
normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
|
98
|
-
self._src_book_names[normalized] = row[0]
|
99
|
-
self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
|
100
|
-
self._verses = pd.read_sql_query("SELECT * FROM Verses;", con)
|
101
|
-
self._chapters = pd.read_sql_query("SELECT * FROM Chapters;", con)
|
102
|
-
cur.close()
|
103
|
-
con.close()
|
104
|
-
self._reported = []
|
105
|
-
self._encoded = {}
|
106
|
-
self._linked = {}
|
107
|
-
|
108
|
-
# Scripture reference parser:
|
109
|
-
self._first_pass = regex.compile(r"""(
|
110
|
-
{{.*?}} |
|
111
|
-
|
112
|
-
(?:[1-5] (?:\p{Z} |
|
113
|
-
\.\p{Z}? |
|
114
|
-
\p{Pd} |
|
115
|
-
\p{L}{1,2} (?:\p{Z} |
|
116
|
-
\.\p{Z}? |
|
117
|
-
\p{Pd}))? |
|
118
|
-
[IV]{1,3} (?:\p{Z} |
|
119
|
-
\.\p{Z}? |
|
120
|
-
\p{Pd}) )?
|
121
|
-
(?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
|
122
|
-
(?:\d+\p{Z}?[:,\.\p{Pd};]\p{Z}?)*
|
123
|
-
(?<=[\p{L},:\p{Pd}]\p{Z} |
|
124
|
-
[\p{L},:\p{Pd}] |
|
125
|
-
\.)\d+
|
126
|
-
(?![,\p{Pd}\p{L}]) |
|
127
|
-
|
128
|
-
(?:[1-5] (?:\p{Z} |
|
129
|
-
\.\p{Z}? |
|
130
|
-
\p{Pd} |
|
131
|
-
\p{L}{1,2} (?:\p{Z} |
|
132
|
-
\.\p{Z}? |
|
133
|
-
\p{Pd}))? |
|
134
|
-
[IV]{1,3} (?:\p{Z} |
|
135
|
-
\.\p{Z}? |
|
136
|
-
\p{Pd}) )
|
137
|
-
(?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]*\p{L}
|
138
|
-
)""", flags=regex.VERBOSE | regex.IGNORECASE)
|
139
|
-
|
140
|
-
self._second_pass = regex.compile(r"""(
|
141
|
-
(?![^{]*}) # ignore already marked
|
142
|
-
\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
|
143
|
-
(?:\d+\p{Z}?[:,\p{Pd};]\p{Z}?)*\d+
|
144
|
-
(?![,\p{Pd}\p{L}])
|
145
|
-
)""", flags=regex.VERBOSE)
|
146
|
-
|
147
|
-
self._bk_ref = regex.compile(r"""
|
148
|
-
((?:[1-5]\p{L}{0,2} |
|
149
|
-
[IV]{1,3} )?
|
150
|
-
[\p{Pd}\.]?[\p{L}\p{Pd}\.\p{Z}]{2,})(.*)
|
151
|
-
""", flags=regex.VERBOSE | regex.IGNORECASE)
|
152
|
-
|
153
|
-
self._tagged = regex.compile(r'({{.*?}})')
|
154
|
-
|
155
|
-
self._cv_cv = regex.compile(r'(\d+):(\d+)-(\d+):(\d+)')
|
156
|
-
self._v_cv = regex.compile(r'(\d+)-(\d+):(\d+)')
|
157
|
-
self._cv_v = regex.compile(r'(\d+):(\d+)-(\d+)')
|
158
|
-
self._cv = regex.compile(r'(\d+):(\d+)')
|
159
|
-
self._ddd = regex.compile(r'(\d+),(\d+),(\d+)')
|
160
|
-
self._dd_d = regex.compile(r'(\d+),(\d+)-(\d+)')
|
161
|
-
self._d_dd = regex.compile(r'(\d+)-(\d+),(\d+)')
|
162
|
-
self._d_d = regex.compile(r'(\d+)-(\d+)(?!:)')
|
163
|
-
self._dd = regex.compile(r'(\d+),(\d+)')
|
164
|
-
self._d = regex.compile(r'(\d+)')
|
165
|
-
|
166
|
-
self._chunk = regex.compile(r'([^,;\p{Z}]+.*)')
|
167
|
-
self._sep = regex.compile(r'(?<!;)\s')
|
168
|
-
|
169
|
-
def _error_report(self, scripture, message):
|
170
|
-
if self._verbose and (scripture not in self._reported):
|
171
|
-
print(f'** "{scripture}" - {message}')
|
172
|
-
self._reported.append(scripture)
|
173
|
-
|
174
|
-
def _scripture_parts(self, scripture):
|
175
|
-
|
176
|
-
def check_book(bk_name):
|
177
|
-
if not self._nl:
|
178
|
-
bk_name = unidecode(bk_name) # NOTE: this converts Génesis to Genesis and English recognizes it !! Feature :-)
|
179
|
-
bk_name = regex.sub(r'\p{P}|\p{Z}', '', bk_name.upper())
|
180
|
-
if bk_name not in self._src_book_names:
|
181
|
-
return None, 0
|
182
|
-
else:
|
183
|
-
bk_num = self._src_book_names[bk_name]
|
184
|
-
return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]
|
185
|
-
|
186
|
-
reduced = regex.sub(r'\p{Z}', '', scripture)
|
187
|
-
reduced = regex.sub(r'\p{Pd}', '-', reduced)
|
188
|
-
result = self._bk_ref.search(reduced)
|
189
|
-
if result:
|
190
|
-
bk_name, rest = result.group(1).strip(), result.group(2).strip()
|
191
|
-
bk_num, last = check_book(bk_name)
|
192
|
-
if bk_num:
|
193
|
-
tr_name = self._tr_book_names[bk_num]
|
194
|
-
return tr_name, rest.replace('.', ':'), bk_num, last # for period notation cases (Gen 1.1)
|
195
|
-
return None, None, None, 0
|
196
|
-
|
197
|
-
def _locate_scriptures(self, text):
|
198
|
-
|
199
|
-
def r(match):
|
200
|
-
scripture = match.group(1)
|
201
|
-
if regex.match(r'{{.*}}', scripture):
|
202
|
-
tag = True
|
203
|
-
scripture = scripture.strip('}{')
|
204
|
-
else:
|
205
|
-
tag = False
|
206
|
-
if scripture in self._encoded.keys():
|
207
|
-
return '{{' + scripture +'}}'
|
208
|
-
_, rest, bk_num, last = self._scripture_parts(scripture)
|
209
|
-
if bk_num:
|
210
|
-
code = self._code_scripture(scripture, bk_num, rest, last) # validation performed
|
211
|
-
if code:
|
212
|
-
self._encoded[scripture] = code
|
213
|
-
return '{{' + scripture +'}}'
|
214
|
-
if tag:
|
215
|
-
return '»»|' + scripture +'|««' # So as not to lose {{ }} on unrecognized pre-tagged scriptures (other language, etc.)
|
216
|
-
else:
|
217
|
-
return scripture
|
218
|
-
|
219
|
-
self._reported = []
|
220
|
-
text = regex.sub(self._first_pass, r, text)
|
221
|
-
return regex.sub(self._second_pass, r, text)
|
222
|
-
|
223
|
-
|
224
|
-
def list_scriptures(self, text):
|
225
|
-
lst = []
|
226
|
-
text = self._locate_scriptures(text)
|
227
|
-
for scripture in regex.findall(self._tagged, text):
|
228
|
-
script = scripture.strip('}{')
|
229
|
-
if self._rewrite:
|
230
|
-
script = self.decode_scriptures(self._encoded[script])[0]
|
231
|
-
if self._upper:
|
232
|
-
script = script.upper()
|
233
|
-
lst.append(script)
|
234
|
-
return lst
|
235
|
-
|
236
|
-
def tag_scriptures(self, text):
|
237
|
-
return self.rewrite_scriptures(text, True)
|
238
|
-
|
239
|
-
def rewrite_scriptures(self, text, tag=False):
|
240
|
-
|
241
|
-
def r(match):
|
242
|
-
script = match.group(1).strip('}{')
|
243
|
-
if self._rewrite:
|
244
|
-
script = self.decode_scriptures(self._encoded[script])[0]
|
245
|
-
if self._upper:
|
246
|
-
script = script.upper()
|
247
|
-
if tag:
|
248
|
-
return '{{'+script+'}}'
|
249
|
-
else:
|
250
|
-
return script
|
251
|
-
|
252
|
-
text = self._locate_scriptures(text)
|
253
|
-
return regex.sub(self._tagged, r, text).replace('»»|', '{{').replace('|««', '}}')
|
254
|
-
|
255
|
-
|
256
|
-
def _code_scripture(self, scripture, bk_num, rest, last):
|
257
|
-
|
258
|
-
def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2, 3) as ranges (1-3)
|
259
|
-
for result in self._d_dd.finditer(txt, overlapped=True):
|
260
|
-
end = result.group(3)
|
261
|
-
mid = result.group(2)
|
262
|
-
start = result.group(1)
|
263
|
-
if int(end) - int(mid) == 1:
|
264
|
-
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
265
|
-
for result in self._ddd.finditer(txt, overlapped=True):
|
266
|
-
end = result.group(3)
|
267
|
-
start = result.group(1)
|
268
|
-
if int(end) - int(start) == 2:
|
269
|
-
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
270
|
-
for result in self._ddd.finditer(txt, overlapped=True):
|
271
|
-
end = result.group(3)
|
272
|
-
start = result.group(1)
|
273
|
-
if int(end) - int(start) == 2:
|
274
|
-
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
275
|
-
for result in self._dd.finditer(txt, overlapped=True):
|
276
|
-
end = result.group(2)
|
277
|
-
start = result.group(1)
|
278
|
-
if int(end) - int(start) == 1:
|
279
|
-
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
280
|
-
return txt
|
281
|
-
|
282
|
-
def validate(b, ch, vs):
|
283
|
-
c = int(ch)
|
284
|
-
v = int(vs)
|
285
|
-
if not (0 < b <= 66): # book out of range
|
286
|
-
return None
|
287
|
-
if not (0 < c <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter.isnull()), ['Last']].values[0]): # chapter out of range
|
288
|
-
return None
|
289
|
-
if not (0 < v <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter == c), ['Last']].values[0]): # verse out of range
|
290
|
-
return None
|
291
|
-
return True
|
292
|
-
|
293
|
-
def code_verses(chunk, book, multi):
|
294
|
-
b = str(book).zfill(2)
|
295
32
|
|
296
|
-
result = self._cv_cv.search(chunk)
|
297
|
-
if result:
|
298
|
-
c = result.group(1)
|
299
|
-
v = result.group(2)
|
300
|
-
if not validate(book, c, v):
|
301
|
-
return None, 0
|
302
|
-
ch1 = c.zfill(3)
|
303
|
-
v1 = v.zfill(3)
|
304
33
|
|
305
|
-
|
306
|
-
v = result.group(4)
|
307
|
-
if not validate(book, c, v):
|
308
|
-
return None, 0
|
309
|
-
ch2 = c.zfill(3)
|
310
|
-
v2 = v.zfill(3)
|
311
|
-
return (b+ch1+v1, b+ch2+v2), ch2
|
312
|
-
|
313
|
-
result = self._cv_v.search(chunk)
|
314
|
-
if result:
|
315
|
-
c = result.group(1)
|
316
|
-
v = result.group(2)
|
317
|
-
if not validate(book, c, v):
|
318
|
-
return None, 0
|
319
|
-
ch1 = c.zfill(3)
|
320
|
-
v1 = v.zfill(3)
|
321
|
-
|
322
|
-
v = result.group(3)
|
323
|
-
if not validate(book, c, v):
|
324
|
-
return None, 0
|
325
|
-
v2 = v.zfill(3)
|
326
|
-
return (b+ch1+v1, b+ch1+v2), ch1
|
327
|
-
|
328
|
-
result = self._v_cv.search(chunk)
|
329
|
-
if result:
|
330
|
-
c = str(ch)
|
331
|
-
v = result.group(1)
|
332
|
-
if not validate(book, c, v):
|
333
|
-
return None, 0
|
334
|
-
ch1 = c
|
335
|
-
v1 = v.zfill(3)
|
336
|
-
|
337
|
-
c = result.group(2)
|
338
|
-
v = result.group(3)
|
339
|
-
if not validate(book, c, v):
|
340
|
-
return None, 0
|
341
|
-
ch2 = c.zfill(3)
|
342
|
-
v2 = v.zfill(3)
|
343
|
-
return (b+ch1+v1, b+ch2+v2), ch2
|
344
|
-
|
345
|
-
result = self._cv.search(chunk)
|
346
|
-
if result:
|
347
|
-
c = result.group(1)
|
348
|
-
v = result.group(2)
|
349
|
-
if not validate(book, c, v):
|
350
|
-
return None, 0
|
351
|
-
ch1 = c.zfill(3)
|
352
|
-
v1 = v.zfill(3)
|
353
|
-
return (b+ch1+v1, b+ch1+v1), ch1
|
354
|
-
|
355
|
-
result = self._d_d.search(chunk)
|
356
|
-
if result:
|
357
|
-
if multi:
|
358
|
-
c = result.group(1)
|
359
|
-
v = 1
|
360
|
-
if not validate(book, c, v):
|
361
|
-
return None, 0
|
362
|
-
ch1 = c.zfill(3)
|
363
|
-
v1 = '001'
|
364
|
-
|
365
|
-
c = result.group(2)
|
366
|
-
if not validate(book, c, v):
|
367
|
-
return None, 0
|
368
|
-
ch2 = c.zfill(3)
|
369
|
-
v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch2)), ['Last']].values[0][0]).zfill(3)
|
370
|
-
return (b+ch1+v1, b+ch2+v2), None
|
371
|
-
else:
|
372
|
-
c = 1
|
373
|
-
v = result.group(1)
|
374
|
-
if not validate(book, c, v):
|
375
|
-
return None, 0
|
376
|
-
ch1 = '001'
|
377
|
-
v1 = v.zfill(3)
|
378
|
-
|
379
|
-
v = result.group(2)
|
380
|
-
if not validate(book, c, v):
|
381
|
-
return None, 0
|
382
|
-
ch2 = ch1
|
383
|
-
v2 = v.zfill(3)
|
384
|
-
return (b+ch1+v1, b+ch2+v2), ch2
|
385
|
-
|
386
|
-
result = self._d.search(chunk)
|
387
|
-
if result:
|
388
|
-
if multi:
|
389
|
-
c = result.group(1)
|
390
|
-
v = 1
|
391
|
-
if not validate(book, c, v):
|
392
|
-
return None, 0
|
393
|
-
ch1 = c.zfill(3)
|
394
|
-
v1 = '001'
|
395
|
-
v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch1)), ['Last']].values[0][0]).zfill(3)
|
396
|
-
return (b+ch1+v1, b+ch1+v2), None
|
397
|
-
else:
|
398
|
-
c = 1
|
399
|
-
v = result.group(1)
|
400
|
-
if not validate(book, c, v):
|
401
|
-
return None, 0
|
402
|
-
ch1 = '001'
|
403
|
-
v1 = v.zfill(3)
|
404
|
-
return (b+ch1+v1, b+ch1+v1), None
|
405
|
-
|
406
|
-
return None, None
|
407
|
-
|
408
|
-
lst = []
|
409
|
-
if rest == '': # whole book
|
410
|
-
v = self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter == last), ['Last']].values[0][0]
|
411
|
-
if last == 1:
|
412
|
-
rest = f'1-{v}'
|
413
|
-
else:
|
414
|
-
rest = f'1:1-{last}:{v}'
|
415
|
-
else:
|
416
|
-
rest = reform_series(rest)
|
417
|
-
for chunk in rest.split(';'):
|
418
|
-
ch = None
|
419
|
-
for bit in chunk.split(','):
|
420
|
-
if ch:
|
421
|
-
tup, ch = code_verses(f"{ch}:{bit}", bk_num, last>1)
|
422
|
-
else:
|
423
|
-
tup, ch = code_verses(bit, bk_num, last>1)
|
424
|
-
if not tup:
|
425
|
-
self._error_report(scripture, f'"{bit.strip()}" OUT OF RANGE')
|
426
|
-
return None
|
427
|
-
lst.append(tup)
|
428
|
-
return lst
|
429
|
-
|
430
|
-
def code_scriptures(self, text):
|
431
|
-
text = self._locate_scriptures(text)
|
432
|
-
lst = []
|
433
|
-
for scripture in regex.findall(self._tagged, text):
|
434
|
-
bcv_ranges = self._encoded[scripture.strip('}{')]
|
435
|
-
for bcv_range in bcv_ranges:
|
436
|
-
lst.append(bcv_range)
|
437
|
-
return lst
|
438
|
-
|
439
|
-
|
440
|
-
def _decode_scripture(self, bcv_range, book='', chap=0, sep=';'):
|
441
|
-
if not bcv_range:
|
442
|
-
return None, '', 0, False
|
443
|
-
start, end = bcv_range
|
444
|
-
sb = int(start[:2])
|
445
|
-
sc = int(start[2:5])
|
446
|
-
sv = int(start[5:])
|
447
|
-
eb = int(end[:2])
|
448
|
-
ec = int(end[2:5])
|
449
|
-
ev = int(end[5:])
|
450
|
-
|
451
|
-
if not (sb == eb):
|
452
|
-
return None, '', 0, False
|
453
|
-
if not ((0 < sb <= 66) & (sb == eb)): # book out of range
|
454
|
-
return None, '', 0, False
|
455
|
-
lc = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0][0]
|
456
|
-
if not (0 < sc <= ec <= lc): # chapter(s) out of range
|
457
|
-
return None, '', 0, False
|
458
|
-
se = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == sc), ['Last']].values[0][0]
|
459
|
-
le = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == ec), ['Last']].values[0][0]
|
460
|
-
if not ((0 < sv <= se) & (0 < ev <= le)): # verse(s) out of range
|
461
|
-
return None, '', 0, False
|
462
|
-
bk_name = self._tr_book_names[sb]
|
463
|
-
if book == bk_name:
|
464
|
-
cont = True
|
465
|
-
else:
|
466
|
-
cont = False
|
467
|
-
book = bk_name
|
468
|
-
c = ec - sc + 1
|
469
|
-
v = ev - sv + 1
|
470
|
-
if lc == 1:
|
471
|
-
if cont:
|
472
|
-
bk_name = ','
|
473
|
-
if v == le:
|
474
|
-
scripture = f"{bk_name.strip(',')}"
|
475
|
-
elif v == 1:
|
476
|
-
scripture = f"{bk_name} {sv}"
|
477
|
-
elif v == 2:
|
478
|
-
scripture = f"{bk_name} {sv}, {ev}"
|
479
|
-
else:
|
480
|
-
scripture = f"{bk_name} {sv}‑{ev}"
|
481
|
-
sep = ';'
|
482
|
-
else:
|
483
|
-
ch = f"{sc}:"
|
484
|
-
if v == le:
|
485
|
-
if cont:
|
486
|
-
bk_name = sep
|
487
|
-
if c == lc:
|
488
|
-
scripture = f"{bk_name.strip(',')}"
|
489
|
-
elif c == 1:
|
490
|
-
scripture = f"{bk_name} {sc}"
|
491
|
-
elif c == 2:
|
492
|
-
scripture = f"{bk_name} {sc}, {ec}"
|
493
|
-
else:
|
494
|
-
scripture = f"{bk_name} {sc}‑{ec}"
|
495
|
-
sep = ','
|
496
|
-
elif c == 1:
|
497
|
-
if cont:
|
498
|
-
if sc == chap:
|
499
|
-
bk_name = ''
|
500
|
-
ch = ', '
|
501
|
-
else:
|
502
|
-
bk_name = ';'
|
503
|
-
if v == 1:
|
504
|
-
scripture = f"{bk_name} {ch}{sv}"
|
505
|
-
elif v == 2:
|
506
|
-
scripture = f"{bk_name} {ch}{sv}, {ev}"
|
507
|
-
else:
|
508
|
-
scripture = f"{bk_name} {ch}{sv}‑{ev}"
|
509
|
-
sep = ';'
|
510
|
-
else:
|
511
|
-
if cont and (sc == chap):
|
512
|
-
bk_name = ''
|
513
|
-
ch = ', '
|
514
|
-
scripture = f"{bk_name} {ch}{sv}‑{ec}:{ev}"
|
515
|
-
sep = ';'
|
516
|
-
chap = ec
|
517
|
-
if self._separator != ' ':
|
518
|
-
scripture = regex.sub(self._sep, self._separator, scripture)
|
519
|
-
return scripture.strip(), book, chap, cont, sep
|
520
|
-
|
521
|
-
def decode_scriptures(self, bcv_ranges=[]):
|
522
|
-
scriptures = []
|
523
|
-
bk = ''
|
524
|
-
ch = 0
|
525
|
-
sep = ';'
|
526
|
-
for bcv_range in bcv_ranges:
|
527
|
-
scripture, bk, ch, cont, sep = self._decode_scripture(bcv_range, bk, ch, sep)
|
528
|
-
if scripture:
|
529
|
-
if cont:
|
530
|
-
scriptures[-1] = scriptures[-1] + scripture
|
531
|
-
else:
|
532
|
-
scriptures.append(scripture)
|
533
|
-
return scriptures
|
534
|
-
|
535
|
-
|
536
|
-
def link_scriptures(self, text, prefix='<a href=', suffix='>'): # NOTE: this always rewrites (full by default) - what if one wants to leave as is??
|
537
|
-
|
538
|
-
def convert_range(bcv_range):
|
539
|
-
if not bcv_range:
|
540
|
-
return None, None
|
541
|
-
start, end = bcv_range
|
542
|
-
sb = int(start[:2])
|
543
|
-
sc = int(start[2:5])
|
544
|
-
sv = int(start[5:])
|
545
|
-
eb = int(end[:2])
|
546
|
-
ec = int(end[2:5])
|
547
|
-
ev = int(end[5:])
|
548
|
-
if start == end:
|
549
|
-
return f"{sb}:{sc}:{sv}"
|
550
|
-
else:
|
551
|
-
return f"{sb}:{sc}:{sv}-{eb}:{ec}:{ev}"
|
552
|
-
|
553
|
-
def r1(match):
|
554
|
-
|
555
|
-
def r2(match):
|
556
|
-
return f'{prefix}{lnk}{suffix}{match.group(1)}</a>'
|
557
|
-
|
558
|
-
scripture = match.group(1).strip('}{')
|
559
|
-
if scripture in self._linked.keys():
|
560
|
-
return self._linked[scripture]
|
561
|
-
output = ''
|
562
|
-
bk = ''
|
563
|
-
ch = 0
|
564
|
-
sep = ';'
|
565
|
-
for bcv_range in self._encoded[scripture]:
|
566
|
-
scrip, bk, ch, _, sep = self._decode_scripture(bcv_range, bk, ch, sep)
|
567
|
-
lnk = convert_range(bcv_range)
|
568
|
-
output += regex.sub(self._chunk, r2, scrip)
|
569
|
-
self._linked[scripture] = output.strip(' ;,')
|
570
|
-
if self._upper:
|
571
|
-
output = output.upper()
|
572
|
-
return output.strip(' ;,')
|
573
|
-
|
574
|
-
text = self._locate_scriptures(text)
|
575
|
-
return regex.sub(self._tagged, r1, text).replace('»»|', '{{').replace('|««', '}}')
|
576
|
-
|
577
|
-
|
578
|
-
def serial_chapter_number(self, bcv):
|
579
|
-
try:
|
580
|
-
return int(self._chapters.loc[(self._chapters['Book'] == int(bcv[0:2])) & (self._chapters['Chapter'] == int(bcv[2:5]))].values[0][0])
|
581
|
-
except:
|
582
|
-
self._error_report(bcv, 'OUT OF RANGE')
|
583
|
-
return None
|
584
|
-
|
585
|
-
def serial_verse_number(self, bcv):
|
586
|
-
try:
|
587
|
-
return int(self._verses.loc[(self._verses['Book'] == int(bcv[0:2])) & (self._verses['Chapter'] == int(bcv[2:5])) & (self._verses['Verse'] == int(bcv[5:]))].values[0][0])
|
588
|
-
except:
|
589
|
-
self._error_report(bcv, 'OUT OF RANGE')
|
590
|
-
return None
|
591
|
-
|
592
|
-
def code_chapter(self, chapter):
|
593
|
-
try:
|
594
|
-
book, chapter = self._chapters[self._chapters['ChapterId'] == int(chapter)].values[0][1:]
|
595
|
-
last = self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == chapter), ['Last']].values[0][0]
|
596
|
-
bcv = str(book).zfill(2) + str(chapter).zfill(3)
|
597
|
-
return f"('{bcv}001', '{bcv}{str(last).zfill(3)}')"
|
598
|
-
except:
|
599
|
-
self._error_report(chapter, 'OUT OF RANGE')
|
600
|
-
return None
|
601
|
-
|
602
|
-
def code_verse(self, verse):
|
603
|
-
bcv = ''
|
604
|
-
try:
|
605
|
-
for i in self._verses[self._verses['VerseId'] == int(verse)].values[0][1:]:
|
606
|
-
bcv += str(i).zfill(3)
|
607
|
-
return f"('{bcv[1:]}', '{bcv[1:]}')"
|
608
|
-
except:
|
609
|
-
self._error_report(verse, 'OUT OF RANGE')
|
610
|
-
return None
|
611
|
-
|
612
|
-
|
613
|
-
def _main(args):
|
34
|
+
def main(args):
|
614
35
|
|
615
36
|
def switchboard(text):
|
616
37
|
if args['cc']:
|
@@ -622,11 +43,11 @@ def _main(args):
|
|
622
43
|
elif args['sv']:
|
623
44
|
return s.serial_verse_number(args['sv'])
|
624
45
|
if args['l'] is not None:
|
625
|
-
prefix = '<a href='
|
626
|
-
suffix = '>'
|
627
|
-
if len(args['l']) > 1:
|
46
|
+
prefix = '<a href="'
|
47
|
+
suffix = '">'
|
48
|
+
if len(args['l']) > 1 and args['l'][1] != '':
|
628
49
|
suffix = args['l'][1]
|
629
|
-
if len(args['l']) > 0:
|
50
|
+
if len(args['l']) > 0 and args['l'][0] != '':
|
630
51
|
prefix = args['l'][0]
|
631
52
|
return s.link_scriptures(text, prefix, suffix)
|
632
53
|
elif args['c']:
|
@@ -671,43 +92,42 @@ def _main(args):
|
|
671
92
|
else:
|
672
93
|
print(txt)
|
673
94
|
|
674
|
-
if __name__ == "__main__":
|
675
|
-
PROJECT_PATH = Path(__file__).resolve().parent
|
676
|
-
APP = 'linkture' # Path(__file__).stem
|
677
|
-
parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
|
678
|
-
|
679
|
-
parser.add_argument('-v', action='version', version=f"{APP} {__version__}", help='show version and exit')
|
680
|
-
parser.add_argument('-q', action='store_true', help="don't show errors")
|
681
|
-
|
682
|
-
function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
|
683
|
-
mode = function_group.add_mutually_exclusive_group()
|
684
|
-
mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
|
685
|
-
mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
|
686
|
-
parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
|
687
|
-
|
688
|
-
parser.add_argument('--language', default='English', choices=available_languages, help='indicate source language for book names (English if unspecified)')
|
689
|
-
parser.add_argument('--translate', choices=available_languages, help='indicate output language for book names (same as source if unspecified)')
|
690
|
-
parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
|
691
|
-
parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
|
692
|
-
format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
|
693
|
-
formats = format_group.add_mutually_exclusive_group()
|
694
|
-
formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
|
695
|
-
formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
|
696
|
-
formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
|
697
|
-
|
698
|
-
type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
|
699
|
-
tpe = type_group.add_mutually_exclusive_group(required=False)
|
700
|
-
tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
|
701
|
-
tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
|
702
|
-
tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
|
703
|
-
tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
|
704
|
-
tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
|
705
95
|
|
706
|
-
|
707
|
-
|
708
|
-
|
709
|
-
|
710
|
-
|
711
|
-
|
712
|
-
|
713
|
-
|
96
|
+
parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
|
97
|
+
|
98
|
+
parser.add_argument('-v', action='version', version=f"{__app__} {__version__}", help='show version and exit')
|
99
|
+
parser.add_argument('-q', action='store_true', help="don't show errors")
|
100
|
+
|
101
|
+
function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
|
102
|
+
mode = function_group.add_mutually_exclusive_group()
|
103
|
+
mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
|
104
|
+
mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
|
105
|
+
parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
|
106
|
+
|
107
|
+
parser.add_argument('--language', default='English', choices=_available_languages, help='indicate source language for book names (English if unspecified)')
|
108
|
+
parser.add_argument('--translate', choices=_available_languages, help='indicate output language for book names (same as source if unspecified)')
|
109
|
+
parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
|
110
|
+
parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
|
111
|
+
format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
|
112
|
+
formats = format_group.add_mutually_exclusive_group()
|
113
|
+
formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
|
114
|
+
formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
|
115
|
+
formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
|
116
|
+
|
117
|
+
type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
|
118
|
+
tpe = type_group.add_mutually_exclusive_group(required=False)
|
119
|
+
tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
|
120
|
+
tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
|
121
|
+
tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
|
122
|
+
tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
|
123
|
+
tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
|
124
|
+
|
125
|
+
aux_group = parser.add_argument_group('auxiliary functions')
|
126
|
+
aux = aux_group.add_mutually_exclusive_group(required=False)
|
127
|
+
aux.add_argument('-sc', metavar=('BCV'), help='return the serial number (1-1189) of the chapter with code "BCV" ("bbcccvvv")')
|
128
|
+
aux.add_argument('-sv', metavar=('BCV'), help='return the serial number (1-31091) of the verse with code "BCV" ("bbcccvvv")')
|
129
|
+
aux.add_argument('-cc', metavar=('chapter'), help='return the BCV range for serial chapter number "chapter" (integer value)')
|
130
|
+
aux.add_argument('-cv', metavar=('verse'), help='return the BCV code for serial verse number "verse" (integer value)')
|
131
|
+
|
132
|
+
args = parser.parse_args()
|
133
|
+
main(vars(args))
|