linkture 2.5.3__py3-none-any.whl → 2.5.4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,22 +1,28 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: linkture
3
- Version: 2.5.3
3
+ Version: 2.5.4
4
4
  Summary: PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode
5
5
  Keywords: bible,scriptures,scripture-references,scripture-translation,scripture-parser,scripture-linker
6
6
  Author-Email: "Eryk J." <infiniti@inventati.org>
7
7
  License: MIT
8
8
  Classifier: License :: OSI Approved :: MIT License
9
9
  Classifier: Operating System :: OS Independent
10
- Classifier: Programming Language :: Python :: 3
10
+ Classifier: Programming Language :: Python :: 3.9
11
+ Classifier: Programming Language :: Python :: 3.10
12
+ Classifier: Programming Language :: Python :: 3.11
13
+ Classifier: Programming Language :: Python :: 3.12
11
14
  Classifier: Development Status :: 5 - Production/Stable
12
15
  Classifier: Environment :: Console
16
+ Classifier: Topic :: Religion
17
+ Classifier: Topic :: Text Processing :: General
18
+ Classifier: Topic :: Text Processing :: Linguistic
13
19
  Project-URL: Homepage, https://github.com/erykjj/linkture
14
20
  Requires-Python: >=3.9
15
21
  Requires-Dist: setuptools>=59.6.0
16
22
  Requires-Dist: argparse>=1.4.0
17
23
  Requires-Dist: regex>=2023.8.8
18
24
  Requires-Dist: unidecode>=1.3.8
19
- Requires-Dist: pathlib
25
+ Requires-Dist: pathlib>=1.0.1
20
26
  Requires-Dist: pandas==2.2.2
21
27
  Description-Content-Type: text/markdown
22
28
 
@@ -40,7 +46,7 @@ A couple of auxiliary functions provide a verse number lookup (either by BCV ref
40
46
  ____
41
47
  ## Installation
42
48
 
43
- Download [latest source](https://github.com/erykjj/linkture/releases/latest) and `python3 -m pip install linkture-*.tar.gz`.
49
+ `python3 -m pip install linkture`
44
50
 
45
51
  ____
46
52
  ## Command-line usage
@@ -0,0 +1,7 @@
1
+ __init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
2
+ linkture-2.5.4.dist-info/METADATA,sha256=UkFQ5l0tC-iaDwLz1dgfuW4OMSuQLxX1Rc5_kjFv-wA,11015
3
+ linkture-2.5.4.dist-info/WHEEL,sha256=7sv5iXvIiTVJSnAxCz2tGBm9DHsb2vPSzeYeT7pvGUY,90
4
+ linkture-2.5.4.dist-info/licenses/LICENSE,sha256=kPqKoVmo3Tx1HgQvqfjBZuYkjT1mZXnQ5R0KBbEeFfs,1064
5
+ linkture.py,sha256=yz3C51p9MkZ05lO3HKE8UGQKTugcdU4v64S86H-dq1Y,29080
6
+ setup.py,sha256=v9tViRX45y4YBwCxPizWUTnXNu4ZPVTwATbl-9m-bAc,1491
7
+ linkture-2.5.4.dist-info/RECORD,,
linkture.py ADDED
@@ -0,0 +1,713 @@
1
+ #!/usr/bin/env python3
2
+
3
+ """
4
+ File: linkture
5
+
6
+ Description: Parse and process Bible scripture references
7
+
8
+ MIT License: Copyright (c) 2024 Eryk J.
9
+
10
+ Permission is hereby granted, free of charge, to any person obtaining a copy
11
+ of this software and associated documentation files (the "Software"), to deal
12
+ in the Software without restriction, including without limitation the rights
13
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
14
+ copies of the Software, and to permit persons to whom the Software is
15
+ furnished to do so, subject to the following conditions:
16
+
17
+ The above copyright notice and this permission notice shall be included in all
18
+ copies or substantial portions of the Software.
19
+
20
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
21
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
22
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
23
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
24
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
25
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
26
+ SOFTWARE.
27
+ """
28
+
29
+ VERSION = 'v2.5.4'
30
+
31
+
32
+ import argparse, json, regex, sqlite3
33
+ import pandas as pd
34
+
35
+ from ast import literal_eval
36
+ from pathlib import Path
37
+ from unidecode import unidecode
38
+
39
+
40
+ available_languages = ('Cebuano', 'Chinese', 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Hungarian', 'Italian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese', 'Russian', 'Spanish', 'Tagalog', 'Ukrainian')
41
+ non_latin = ('Chinese', 'Greek', 'Japanese', 'Korean', 'Russian', 'Ukrainian')
42
+
43
+
44
+ class Scriptures():
45
+
46
+ def __init__(self, language='English', translate=None, form=None, separator=' ', upper=False, verbose=False):
47
+ self._verbose = verbose
48
+ self._separator = separator
49
+ if language not in available_languages:
50
+ raise ValueError("Indicated source language is not an option!")
51
+ if translate:
52
+ if translate not in available_languages:
53
+ raise ValueError("Indicated translation language is not an option!")
54
+ else:
55
+ translate = language
56
+ if language in non_latin:
57
+ self._nl = True
58
+ else:
59
+ self._nl = False
60
+ self._rewrite = bool((language != translate) or form)
61
+ self._upper = upper
62
+ if form == "full":
63
+ form = 3
64
+ elif form == "standard":
65
+ form = 4
66
+ elif form == "official":
67
+ form = 5
68
+ else:
69
+ form = 3
70
+ self._src_book_names = {}
71
+ path = Path(__file__).resolve().parent
72
+
73
+ self._tr_book_names = ['Bible']
74
+ con = sqlite3.connect(path / 'res/resources.db')
75
+ cur = con.cursor()
76
+ for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{translate}';").fetchall():
77
+ if self._upper:
78
+ tr = rec[form].upper()
79
+ else:
80
+ tr = rec[form]
81
+ self._tr_book_names.insert(rec[2], tr)
82
+ for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
83
+ for i in range(3,6):
84
+ item = rec[i]
85
+ if not self._nl:
86
+ item = unidecode(item)
87
+ normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
88
+ self._src_book_names[normalized] = rec[2]
89
+ with open(path / 'res/custom.json', 'r', encoding='UTF-8') as json_file:
90
+ b = json.load(json_file)
91
+ if language in b.keys():
92
+ for row in b[language]:
93
+ names = row[1].split(', ')
94
+ for item in names:
95
+ if not self._nl:
96
+ item = unidecode(item)
97
+ normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
98
+ self._src_book_names[normalized] = row[0]
99
+ self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
100
+ self._verses = pd.read_sql_query("SELECT * FROM Verses;", con)
101
+ self._chapters = pd.read_sql_query("SELECT * FROM Chapters;", con)
102
+ cur.close()
103
+ con.close()
104
+ self._reported = []
105
+ self._encoded = {}
106
+ self._linked = {}
107
+
108
+ # Scripture reference parser:
109
+ self._first_pass = regex.compile(r"""(
110
+ {{.*?}} |
111
+
112
+ (?:[1-5] (?:\p{Z} |
113
+ \.\p{Z}? |
114
+ \p{Pd} |
115
+ \p{L}{1,2} (?:\p{Z} |
116
+ \.\p{Z}? |
117
+ \p{Pd}))? |
118
+ [IV]{1,3} (?:\p{Z} |
119
+ \.\p{Z}? |
120
+ \p{Pd}) )?
121
+ (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
122
+ (?:\d+\p{Z}?[:,\.\p{Pd};]\p{Z}?)*
123
+ (?<=[\p{L},:\p{Pd}]\p{Z} |
124
+ [\p{L},:\p{Pd}] |
125
+ \.)\d+
126
+ (?![,\p{Pd}\p{L}]) |
127
+
128
+ (?:[1-5] (?:\p{Z} |
129
+ \.\p{Z}? |
130
+ \p{Pd} |
131
+ \p{L}{1,2} (?:\p{Z} |
132
+ \.\p{Z}? |
133
+ \p{Pd}))? |
134
+ [IV]{1,3} (?:\p{Z} |
135
+ \.\p{Z}? |
136
+ \p{Pd}) )
137
+ (?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]*\p{L}
138
+ )""", flags=regex.VERBOSE | regex.IGNORECASE)
139
+
140
+ self._second_pass = regex.compile(r"""(
141
+ (?![^{]*}) # ignore already marked
142
+ \p{L}[\p{L}\p{Pd}\.]+\p{Z}?
143
+ (?:\d+\p{Z}?[:,\p{Pd};]\p{Z}?)*\d+
144
+ (?![,\p{Pd}\p{L}])
145
+ )""", flags=regex.VERBOSE)
146
+
147
+ self._bk_ref = regex.compile(r"""
148
+ ((?:[1-5]\p{L}{0,2} |
149
+ [IV]{1,3} )?
150
+ [\p{Pd}\.]?[\p{L}\p{Pd}\.\p{Z}]{2,})(.*)
151
+ """, flags=regex.VERBOSE | regex.IGNORECASE)
152
+
153
+ self._tagged = regex.compile(r'({{.*?}})')
154
+
155
+ self._cv_cv = regex.compile(r'(\d+):(\d+)-(\d+):(\d+)')
156
+ self._v_cv = regex.compile(r'(\d+)-(\d+):(\d+)')
157
+ self._cv_v = regex.compile(r'(\d+):(\d+)-(\d+)')
158
+ self._cv = regex.compile(r'(\d+):(\d+)')
159
+ self._ddd = regex.compile(r'(\d+),(\d+),(\d+)')
160
+ self._dd_d = regex.compile(r'(\d+),(\d+)-(\d+)')
161
+ self._d_dd = regex.compile(r'(\d+)-(\d+),(\d+)')
162
+ self._d_d = regex.compile(r'(\d+)-(\d+)(?!:)')
163
+ self._dd = regex.compile(r'(\d+),(\d+)')
164
+ self._d = regex.compile(r'(\d+)')
165
+
166
+ self._chunk = regex.compile(r'([^,;\p{Z}]+.*)')
167
+ self._sep = regex.compile(r'(?<!;)\s')
168
+
169
+ def _error_report(self, scripture, message):
170
+ if self._verbose and (scripture not in self._reported):
171
+ print(f'** "{scripture}" - {message}')
172
+ self._reported.append(scripture)
173
+
174
+ def _scripture_parts(self, scripture):
175
+
176
+ def check_book(bk_name):
177
+ if not self._nl:
178
+ bk_name = unidecode(bk_name) # NOTE: this converts Génesis to Genesis and English recognizes it !! Feature :-)
179
+ bk_name = regex.sub(r'\p{P}|\p{Z}', '', bk_name.upper())
180
+ if bk_name not in self._src_book_names:
181
+ return None, 0
182
+ else:
183
+ bk_num = self._src_book_names[bk_name]
184
+ return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]
185
+
186
+ reduced = regex.sub(r'\p{Z}', '', scripture)
187
+ reduced = regex.sub(r'\p{Pd}', '-', reduced)
188
+ result = self._bk_ref.search(reduced)
189
+ if result:
190
+ bk_name, rest = result.group(1).strip(), result.group(2).strip()
191
+ bk_num, last = check_book(bk_name)
192
+ if bk_num:
193
+ tr_name = self._tr_book_names[bk_num]
194
+ return tr_name, rest.replace('.', ':'), bk_num, last # for period notation cases (Gen 1.1)
195
+ return None, None, None, 0
196
+
197
+ def _locate_scriptures(self, text):
198
+
199
+ def r(match):
200
+ scripture = match.group(1)
201
+ if regex.match(r'{{.*}}', scripture):
202
+ tag = True
203
+ scripture = scripture.strip('}{')
204
+ else:
205
+ tag = False
206
+ if scripture in self._encoded.keys():
207
+ return '{{' + scripture +'}}'
208
+ _, rest, bk_num, last = self._scripture_parts(scripture)
209
+ if bk_num:
210
+ code = self._code_scripture(scripture, bk_num, rest, last) # validation performed
211
+ if code:
212
+ self._encoded[scripture] = code
213
+ return '{{' + scripture +'}}'
214
+ if tag:
215
+ return '»»|' + scripture +'|««' # So as not to lose {{ }} on unrecognized pre-tagged scriptures (other language, etc.)
216
+ else:
217
+ return scripture
218
+
219
+ self._reported = []
220
+ text = regex.sub(self._first_pass, r, text)
221
+ return regex.sub(self._second_pass, r, text)
222
+
223
+
224
+ def list_scriptures(self, text):
225
+ lst = []
226
+ text = self._locate_scriptures(text)
227
+ for scripture in regex.findall(self._tagged, text):
228
+ script = scripture.strip('}{')
229
+ if self._rewrite:
230
+ script = self.decode_scriptures(self._encoded[script])[0]
231
+ if self._upper:
232
+ script = script.upper()
233
+ lst.append(script)
234
+ return lst
235
+
236
+ def tag_scriptures(self, text):
237
+ return self.rewrite_scriptures(text, True)
238
+
239
+ def rewrite_scriptures(self, text, tag=False):
240
+
241
+ def r(match):
242
+ script = match.group(1).strip('}{')
243
+ if self._rewrite:
244
+ script = self.decode_scriptures(self._encoded[script])[0]
245
+ if self._upper:
246
+ script = script.upper()
247
+ if tag:
248
+ return '{{'+script+'}}'
249
+ else:
250
+ return script
251
+
252
+ text = self._locate_scriptures(text)
253
+ return regex.sub(self._tagged, r, text).replace('»»|', '{{').replace('|««', '}}')
254
+
255
+
256
+ def _code_scripture(self, scripture, bk_num, rest, last):
257
+
258
+ def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2, 3) as ranges (1-3)
259
+ for result in self._d_dd.finditer(txt, overlapped=True):
260
+ end = result.group(3)
261
+ mid = result.group(2)
262
+ start = result.group(1)
263
+ if int(end) - int(mid) == 1:
264
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
265
+ for result in self._ddd.finditer(txt, overlapped=True):
266
+ end = result.group(3)
267
+ start = result.group(1)
268
+ if int(end) - int(start) == 2:
269
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
270
+ for result in self._ddd.finditer(txt, overlapped=True):
271
+ end = result.group(3)
272
+ start = result.group(1)
273
+ if int(end) - int(start) == 2:
274
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
275
+ for result in self._dd.finditer(txt, overlapped=True):
276
+ end = result.group(2)
277
+ start = result.group(1)
278
+ if int(end) - int(start) == 1:
279
+ txt = regex.sub(result.group(), f"{start}-{end}", txt)
280
+ return txt
281
+
282
+ def validate(b, ch, vs):
283
+ c = int(ch)
284
+ v = int(vs)
285
+ if not (0 < b <= 66): # book out of range
286
+ return None
287
+ if not (0 < c <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter.isnull()), ['Last']].values[0]): # chapter out of range
288
+ return None
289
+ if not (0 < v <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter == c), ['Last']].values[0]): # verse out of range
290
+ return None
291
+ return True
292
+
293
+ def code_verses(chunk, book, multi):
294
+ b = str(book).zfill(2)
295
+
296
+ result = self._cv_cv.search(chunk)
297
+ if result:
298
+ c = result.group(1)
299
+ v = result.group(2)
300
+ if not validate(book, c, v):
301
+ return None, 0
302
+ ch1 = c.zfill(3)
303
+ v1 = v.zfill(3)
304
+
305
+ c = result.group(3)
306
+ v = result.group(4)
307
+ if not validate(book, c, v):
308
+ return None, 0
309
+ ch2 = c.zfill(3)
310
+ v2 = v.zfill(3)
311
+ return (b+ch1+v1, b+ch2+v2), ch2
312
+
313
+ result = self._cv_v.search(chunk)
314
+ if result:
315
+ c = result.group(1)
316
+ v = result.group(2)
317
+ if not validate(book, c, v):
318
+ return None, 0
319
+ ch1 = c.zfill(3)
320
+ v1 = v.zfill(3)
321
+
322
+ v = result.group(3)
323
+ if not validate(book, c, v):
324
+ return None, 0
325
+ v2 = v.zfill(3)
326
+ return (b+ch1+v1, b+ch1+v2), ch1
327
+
328
+ result = self._v_cv.search(chunk)
329
+ if result:
330
+ c = str(ch)
331
+ v = result.group(1)
332
+ if not validate(book, c, v):
333
+ return None, 0
334
+ ch1 = c
335
+ v1 = v.zfill(3)
336
+
337
+ c = result.group(2)
338
+ v = result.group(3)
339
+ if not validate(book, c, v):
340
+ return None, 0
341
+ ch2 = c.zfill(3)
342
+ v2 = v.zfill(3)
343
+ return (b+ch1+v1, b+ch2+v2), ch2
344
+
345
+ result = self._cv.search(chunk)
346
+ if result:
347
+ c = result.group(1)
348
+ v = result.group(2)
349
+ if not validate(book, c, v):
350
+ return None, 0
351
+ ch1 = c.zfill(3)
352
+ v1 = v.zfill(3)
353
+ return (b+ch1+v1, b+ch1+v1), ch1
354
+
355
+ result = self._d_d.search(chunk)
356
+ if result:
357
+ if multi:
358
+ c = result.group(1)
359
+ v = 1
360
+ if not validate(book, c, v):
361
+ return None, 0
362
+ ch1 = c.zfill(3)
363
+ v1 = '001'
364
+
365
+ c = result.group(2)
366
+ if not validate(book, c, v):
367
+ return None, 0
368
+ ch2 = c.zfill(3)
369
+ v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch2)), ['Last']].values[0][0]).zfill(3)
370
+ return (b+ch1+v1, b+ch2+v2), None
371
+ else:
372
+ c = 1
373
+ v = result.group(1)
374
+ if not validate(book, c, v):
375
+ return None, 0
376
+ ch1 = '001'
377
+ v1 = v.zfill(3)
378
+
379
+ v = result.group(2)
380
+ if not validate(book, c, v):
381
+ return None, 0
382
+ ch2 = ch1
383
+ v2 = v.zfill(3)
384
+ return (b+ch1+v1, b+ch2+v2), ch2
385
+
386
+ result = self._d.search(chunk)
387
+ if result:
388
+ if multi:
389
+ c = result.group(1)
390
+ v = 1
391
+ if not validate(book, c, v):
392
+ return None, 0
393
+ ch1 = c.zfill(3)
394
+ v1 = '001'
395
+ v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch1)), ['Last']].values[0][0]).zfill(3)
396
+ return (b+ch1+v1, b+ch1+v2), None
397
+ else:
398
+ c = 1
399
+ v = result.group(1)
400
+ if not validate(book, c, v):
401
+ return None, 0
402
+ ch1 = '001'
403
+ v1 = v.zfill(3)
404
+ return (b+ch1+v1, b+ch1+v1), None
405
+
406
+ return None, None
407
+
408
+ lst = []
409
+ if rest == '': # whole book
410
+ v = self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter == last), ['Last']].values[0][0]
411
+ if last == 1:
412
+ rest = f'1-{v}'
413
+ else:
414
+ rest = f'1:1-{last}:{v}'
415
+ else:
416
+ rest = reform_series(rest)
417
+ for chunk in rest.split(';'):
418
+ ch = None
419
+ for bit in chunk.split(','):
420
+ if ch:
421
+ tup, ch = code_verses(f"{ch}:{bit}", bk_num, last>1)
422
+ else:
423
+ tup, ch = code_verses(bit, bk_num, last>1)
424
+ if not tup:
425
+ self._error_report(scripture, f'"{bit.strip()}" OUT OF RANGE')
426
+ return None
427
+ lst.append(tup)
428
+ return lst
429
+
430
+ def code_scriptures(self, text):
431
+ text = self._locate_scriptures(text)
432
+ lst = []
433
+ for scripture in regex.findall(self._tagged, text):
434
+ bcv_ranges = self._encoded[scripture.strip('}{')]
435
+ for bcv_range in bcv_ranges:
436
+ lst.append(bcv_range)
437
+ return lst
438
+
439
+
440
+ def _decode_scripture(self, bcv_range, book='', chap=0, sep=';'):
441
+ if not bcv_range:
442
+ return None, '', 0, False
443
+ start, end = bcv_range
444
+ sb = int(start[:2])
445
+ sc = int(start[2:5])
446
+ sv = int(start[5:])
447
+ eb = int(end[:2])
448
+ ec = int(end[2:5])
449
+ ev = int(end[5:])
450
+
451
+ if not (sb == eb):
452
+ return None, '', 0, False
453
+ if not ((0 < sb <= 66) & (sb == eb)): # book out of range
454
+ return None, '', 0, False
455
+ lc = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0][0]
456
+ if not (0 < sc <= ec <= lc): # chapter(s) out of range
457
+ return None, '', 0, False
458
+ se = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == sc), ['Last']].values[0][0]
459
+ le = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == ec), ['Last']].values[0][0]
460
+ if not ((0 < sv <= se) & (0 < ev <= le)): # verse(s) out of range
461
+ return None, '', 0, False
462
+ bk_name = self._tr_book_names[sb]
463
+ if book == bk_name:
464
+ cont = True
465
+ else:
466
+ cont = False
467
+ book = bk_name
468
+ c = ec - sc + 1
469
+ v = ev - sv + 1
470
+ if lc == 1:
471
+ if cont:
472
+ bk_name = ','
473
+ if v == le:
474
+ scripture = f"{bk_name.strip(',')}"
475
+ elif v == 1:
476
+ scripture = f"{bk_name} {sv}"
477
+ elif v == 2:
478
+ scripture = f"{bk_name} {sv}, {ev}"
479
+ else:
480
+ scripture = f"{bk_name} {sv}‑{ev}"
481
+ sep = ';'
482
+ else:
483
+ ch = f"{sc}:"
484
+ if v == le:
485
+ if cont:
486
+ bk_name = sep
487
+ if c == lc:
488
+ scripture = f"{bk_name.strip(',')}"
489
+ elif c == 1:
490
+ scripture = f"{bk_name} {sc}"
491
+ elif c == 2:
492
+ scripture = f"{bk_name} {sc}, {ec}"
493
+ else:
494
+ scripture = f"{bk_name} {sc}‑{ec}"
495
+ sep = ','
496
+ elif c == 1:
497
+ if cont:
498
+ if sc == chap:
499
+ bk_name = ''
500
+ ch = ', '
501
+ else:
502
+ bk_name = ';'
503
+ if v == 1:
504
+ scripture = f"{bk_name} {ch}{sv}"
505
+ elif v == 2:
506
+ scripture = f"{bk_name} {ch}{sv}, {ev}"
507
+ else:
508
+ scripture = f"{bk_name} {ch}{sv}‑{ev}"
509
+ sep = ';'
510
+ else:
511
+ if cont and (sc == chap):
512
+ bk_name = ''
513
+ ch = ', '
514
+ scripture = f"{bk_name} {ch}{sv}‑{ec}:{ev}"
515
+ sep = ';'
516
+ chap = ec
517
+ if self._separator != ' ':
518
+ scripture = regex.sub(self._sep, self._separator, scripture)
519
+ return scripture.strip(), book, chap, cont, sep
520
+
521
+ def decode_scriptures(self, bcv_ranges=[]):
522
+ scriptures = []
523
+ bk = ''
524
+ ch = 0
525
+ sep = ';'
526
+ for bcv_range in bcv_ranges:
527
+ scripture, bk, ch, cont, sep = self._decode_scripture(bcv_range, bk, ch, sep)
528
+ if scripture:
529
+ if cont:
530
+ scriptures[-1] = scriptures[-1] + scripture
531
+ else:
532
+ scriptures.append(scripture)
533
+ return scriptures
534
+
535
+
536
+ def link_scriptures(self, text, prefix='<a href=', suffix='>'): # NOTE: this always rewrites (full by default) - what if one wants to leave as is??
537
+
538
+ def convert_range(bcv_range):
539
+ if not bcv_range:
540
+ return None, None
541
+ start, end = bcv_range
542
+ sb = int(start[:2])
543
+ sc = int(start[2:5])
544
+ sv = int(start[5:])
545
+ eb = int(end[:2])
546
+ ec = int(end[2:5])
547
+ ev = int(end[5:])
548
+ if start == end:
549
+ return f"{sb}:{sc}:{sv}"
550
+ else:
551
+ return f"{sb}:{sc}:{sv}-{eb}:{ec}:{ev}"
552
+
553
+ def r1(match):
554
+
555
+ def r2(match):
556
+ return f'{prefix}{lnk}{suffix}{match.group(1)}</a>'
557
+
558
+ scripture = match.group(1).strip('}{')
559
+ if scripture in self._linked.keys():
560
+ return self._linked[scripture]
561
+ output = ''
562
+ bk = ''
563
+ ch = 0
564
+ sep = ';'
565
+ for bcv_range in self._encoded[scripture]:
566
+ scrip, bk, ch, _, sep = self._decode_scripture(bcv_range, bk, ch, sep)
567
+ lnk = convert_range(bcv_range)
568
+ output += regex.sub(self._chunk, r2, scrip)
569
+ self._linked[scripture] = output.strip(' ;,')
570
+ if self._upper:
571
+ output = output.upper()
572
+ return output.strip(' ;,')
573
+
574
+ text = self._locate_scriptures(text)
575
+ return regex.sub(self._tagged, r1, text).replace('»»|', '{{').replace('|««', '}}')
576
+
577
+
578
+ def serial_chapter_number(self, bcv):
579
+ try:
580
+ return int(self._chapters.loc[(self._chapters['Book'] == int(bcv[0:2])) & (self._chapters['Chapter'] == int(bcv[2:5]))].values[0][0])
581
+ except:
582
+ self._error_report(bcv, 'OUT OF RANGE')
583
+ return None
584
+
585
+ def serial_verse_number(self, bcv):
586
+ try:
587
+ return int(self._verses.loc[(self._verses['Book'] == int(bcv[0:2])) & (self._verses['Chapter'] == int(bcv[2:5])) & (self._verses['Verse'] == int(bcv[5:]))].values[0][0])
588
+ except:
589
+ self._error_report(bcv, 'OUT OF RANGE')
590
+ return None
591
+
592
+ def code_chapter(self, chapter):
593
+ try:
594
+ book, chapter = self._chapters[self._chapters['ChapterId'] == int(chapter)].values[0][1:]
595
+ last = self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == chapter), ['Last']].values[0][0]
596
+ bcv = str(book).zfill(2) + str(chapter).zfill(3)
597
+ return f"('{bcv}001', '{bcv}{str(last).zfill(3)}')"
598
+ except:
599
+ self._error_report(chapter, 'OUT OF RANGE')
600
+ return None
601
+
602
+ def code_verse(self, verse):
603
+ bcv = ''
604
+ try:
605
+ for i in self._verses[self._verses['VerseId'] == int(verse)].values[0][1:]:
606
+ bcv += str(i).zfill(3)
607
+ return f"('{bcv[1:]}', '{bcv[1:]}')"
608
+ except:
609
+ self._error_report(verse, 'OUT OF RANGE')
610
+ return None
611
+
612
+
613
+ def _main(args):
614
+
615
+ def switchboard(text):
616
+ if args['cc']:
617
+ return s.code_chapter(args['cc'])
618
+ elif args['cv']:
619
+ return s.code_verse(args['cv'])
620
+ elif args['sc']:
621
+ return s.serial_chapter_number(args['sc'])
622
+ elif args['sv']:
623
+ return s.serial_verse_number(args['sv'])
624
+ if args['l'] is not None:
625
+ prefix = '<a href='
626
+ suffix = '>'
627
+ if len(args['l']) > 1:
628
+ suffix = args['l'][1]
629
+ if len(args['l']) > 0:
630
+ prefix = args['l'][0]
631
+ return s.link_scriptures(text, prefix, suffix)
632
+ elif args['c']:
633
+ return s.code_scriptures(text)
634
+ elif args['d']:
635
+ return s.decode_scriptures(literal_eval(text))
636
+ elif args['x']:
637
+ return s.list_scriptures(text)
638
+ elif args['t']:
639
+ return s.tag_scriptures(text)
640
+ else:
641
+ return s.rewrite_scriptures(text)
642
+
643
+ form = None
644
+ if args['standard']:
645
+ form = 'standard'
646
+ elif args['official']:
647
+ form = 'official'
648
+ elif args['full']:
649
+ form = 'full'
650
+
651
+ s = Scriptures(language=args['language'], translate=args['translate'], form=form, separator=args['s'], upper=args['u'], verbose=(not args['q']))
652
+
653
+ if args['f']:
654
+ if args['o'] and (args['o'] == args['f']):
655
+ print('Make sure in-file and out-file are different!\n')
656
+ exit()
657
+ with open(args['f'], 'r', encoding='UTF-8') as f:
658
+ txt = f.read()
659
+ else:
660
+ txt = args['r']
661
+
662
+ if txt:
663
+ txt = switchboard(txt)
664
+ else:
665
+ print(parser.format_help())
666
+ exit()
667
+
668
+ if args['o']:
669
+ with open(args['o'], 'w', encoding='UTF-8') as f:
670
+ f.write(str(txt))
671
+ else:
672
+ print(txt)
673
+
674
+ if __name__ == "__main__":
675
+ PROJECT_PATH = Path(__file__).resolve().parent
676
+ APP = Path(__file__).stem
677
+ parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
678
+
679
+ parser.add_argument('-v', action='version', version=f"{APP} {VERSION}", help='show version and exit')
680
+ parser.add_argument('-q', action='store_true', help="don't show errors")
681
+
682
+ function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
683
+ mode = function_group.add_mutually_exclusive_group()
684
+ mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
685
+ mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
686
+ parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
687
+
688
+ parser.add_argument('--language', default='English', choices=available_languages, help='indicate source language for book names (English if unspecified)')
689
+ parser.add_argument('--translate', choices=available_languages, help='indicate output language for book names (same as source if unspecified)')
690
+ parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
691
+ parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
692
+ format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
693
+ formats = format_group.add_mutually_exclusive_group()
694
+ formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
695
+ formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
696
+ formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
697
+
698
+ type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
699
+ tpe = type_group.add_mutually_exclusive_group(required=False)
700
+ tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
701
+ tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
702
+ tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
703
+ tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
704
+ tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
705
+
706
+ aux_group = parser.add_argument_group('auxiliary functions')
707
+ aux = aux_group.add_mutually_exclusive_group(required=False)
708
+ aux.add_argument('-sc', metavar=('BCV'), help='return the serial number (1-1189) of the chapter with code "BCV" ("bbcccvvv")')
709
+ aux.add_argument('-sv', metavar=('BCV'), help='return the serial number (1-31091) of the verse with code "BCV" ("bbcccvvv")')
710
+ aux.add_argument('-cc', metavar=('chapter'), help='return the BCV range for serial chapter number "chapter" (integer value)')
711
+ aux.add_argument('-cv', metavar=('verse'), help='return the BCV code for serial verse number "verse" (integer value)')
712
+ args = parser.parse_args()
713
+ _main(vars(args))
setup.py ADDED
@@ -0,0 +1,49 @@
1
+ import setuptools
2
+ from pathlib import Path
3
+
4
+ work_dir = Path(__file__).parent
5
+ long_description = (work_dir / 'README.md').read_text()
6
+
7
+
8
+ setuptools.setup(
9
+
10
+ name='linkture',
11
+ version='2.5.4',
12
+ author='Eryk J.',
13
+ author_email='infiniti@inventati.org',
14
+ url='https://github.com/erykjj/linkture',
15
+ license='MIT',
16
+
17
+ description='PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode',
18
+ long_description=long_description,
19
+ long_description_content_type='text/markdown',
20
+ keywords=[
21
+ 'bible', 'scriptures', 'scripture-references', 'scripture-translation',
22
+ 'scripture-parser', 'scripture-linker'
23
+ ],
24
+
25
+ packages=setuptools.find_packages(),
26
+ classifiers=[
27
+ 'License :: OSI Approved :: MIT License',
28
+ 'Operating System :: OS Independent',
29
+ 'Programming Language :: Python :: 3.9',
30
+ 'Programming Language :: Python :: 3.10',
31
+ 'Programming Language :: Python :: 3.11',
32
+ 'Programming Language :: Python :: 3.12',
33
+ 'Development Status :: 5 - Production/Stable',
34
+ 'Environment :: Console',
35
+ 'Topic :: Religion',
36
+ 'Topic :: Text Processing :: General',
37
+ 'Topic :: Text Processing :: Linguistic'
38
+ ],
39
+ python_requires='>=3.9',
40
+ install_requires=[
41
+ 'setuptools>=59.6.0',
42
+ 'argparse>=1.4.0',
43
+ 'regex>=2023.8.8',
44
+ 'unidecode>=1.3.8',
45
+ 'pathlib>=1.0.1',
46
+ 'pandas==2.2.2'
47
+ ]
48
+
49
+ )
@@ -1,5 +0,0 @@
1
- linkture-2.5.3.dist-info/METADATA,sha256=GKWPEAFsZyzI30JI8lULmXCM69mORnFWpdHvvv-9hz8,10815
2
- linkture-2.5.3.dist-info/WHEEL,sha256=7sv5iXvIiTVJSnAxCz2tGBm9DHsb2vPSzeYeT7pvGUY,90
3
- linkture-2.5.3.dist-info/licenses/LICENSE,sha256=kPqKoVmo3Tx1HgQvqfjBZuYkjT1mZXnQ5R0KBbEeFfs,1064
4
- linkture/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
5
- linkture-2.5.3.dist-info/RECORD,,
File without changes