linkture 2.5.3__tar.gz → 2.5.4__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {linkture-2.5.3 → linkture-2.5.4}/PKG-INFO +10 -4
- {linkture-2.5.3 → linkture-2.5.4}/README.md +1 -1
- linkture-2.5.4/linkture.py +713 -0
- {linkture-2.5.3 → linkture-2.5.4}/pyproject.toml +9 -3
- linkture-2.5.4/setup.py +49 -0
- linkture-2.5.3/tests/__init__.py +0 -0
- {linkture-2.5.3 → linkture-2.5.4}/LICENSE +0 -0
- {linkture-2.5.3/src/linkture → linkture-2.5.4}/__init__.py +0 -0
@@ -1,22 +1,28 @@
|
|
1
1
|
Metadata-Version: 2.1
|
2
2
|
Name: linkture
|
3
|
-
Version: 2.5.
|
3
|
+
Version: 2.5.4
|
4
4
|
Summary: PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode
|
5
5
|
Keywords: bible,scriptures,scripture-references,scripture-translation,scripture-parser,scripture-linker
|
6
6
|
Author-Email: "Eryk J." <infiniti@inventati.org>
|
7
7
|
License: MIT
|
8
8
|
Classifier: License :: OSI Approved :: MIT License
|
9
9
|
Classifier: Operating System :: OS Independent
|
10
|
-
Classifier: Programming Language :: Python :: 3
|
10
|
+
Classifier: Programming Language :: Python :: 3.9
|
11
|
+
Classifier: Programming Language :: Python :: 3.10
|
12
|
+
Classifier: Programming Language :: Python :: 3.11
|
13
|
+
Classifier: Programming Language :: Python :: 3.12
|
11
14
|
Classifier: Development Status :: 5 - Production/Stable
|
12
15
|
Classifier: Environment :: Console
|
16
|
+
Classifier: Topic :: Religion
|
17
|
+
Classifier: Topic :: Text Processing :: General
|
18
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
13
19
|
Project-URL: Homepage, https://github.com/erykjj/linkture
|
14
20
|
Requires-Python: >=3.9
|
15
21
|
Requires-Dist: setuptools>=59.6.0
|
16
22
|
Requires-Dist: argparse>=1.4.0
|
17
23
|
Requires-Dist: regex>=2023.8.8
|
18
24
|
Requires-Dist: unidecode>=1.3.8
|
19
|
-
Requires-Dist: pathlib
|
25
|
+
Requires-Dist: pathlib>=1.0.1
|
20
26
|
Requires-Dist: pandas==2.2.2
|
21
27
|
Description-Content-Type: text/markdown
|
22
28
|
|
@@ -40,7 +46,7 @@ A couple of auxiliary functions provide a verse number lookup (either by BCV ref
|
|
40
46
|
____
|
41
47
|
## Installation
|
42
48
|
|
43
|
-
|
49
|
+
`python3 -m pip install linkture`
|
44
50
|
|
45
51
|
____
|
46
52
|
## Command-line usage
|
@@ -18,7 +18,7 @@ A couple of auxiliary functions provide a verse number lookup (either by BCV ref
|
|
18
18
|
____
|
19
19
|
## Installation
|
20
20
|
|
21
|
-
|
21
|
+
`python3 -m pip install linkture`
|
22
22
|
|
23
23
|
____
|
24
24
|
## Command-line usage
|
@@ -0,0 +1,713 @@
|
|
1
|
+
#!/usr/bin/env python3
|
2
|
+
|
3
|
+
"""
|
4
|
+
File: linkture
|
5
|
+
|
6
|
+
Description: Parse and process Bible scripture references
|
7
|
+
|
8
|
+
MIT License: Copyright (c) 2024 Eryk J.
|
9
|
+
|
10
|
+
Permission is hereby granted, free of charge, to any person obtaining a copy
|
11
|
+
of this software and associated documentation files (the "Software"), to deal
|
12
|
+
in the Software without restriction, including without limitation the rights
|
13
|
+
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
14
|
+
copies of the Software, and to permit persons to whom the Software is
|
15
|
+
furnished to do so, subject to the following conditions:
|
16
|
+
|
17
|
+
The above copyright notice and this permission notice shall be included in all
|
18
|
+
copies or substantial portions of the Software.
|
19
|
+
|
20
|
+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
21
|
+
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
22
|
+
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
23
|
+
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
24
|
+
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
25
|
+
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
26
|
+
SOFTWARE.
|
27
|
+
"""
|
28
|
+
|
29
|
+
VERSION = 'v2.5.4'
|
30
|
+
|
31
|
+
|
32
|
+
import argparse, json, regex, sqlite3
|
33
|
+
import pandas as pd
|
34
|
+
|
35
|
+
from ast import literal_eval
|
36
|
+
from pathlib import Path
|
37
|
+
from unidecode import unidecode
|
38
|
+
|
39
|
+
|
40
|
+
available_languages = ('Cebuano', 'Chinese', 'Danish', 'Dutch', 'English', 'French', 'German', 'Greek', 'Hungarian', 'Italian', 'Japanese', 'Korean', 'Norwegian', 'Polish', 'Portuguese', 'Russian', 'Spanish', 'Tagalog', 'Ukrainian')
|
41
|
+
non_latin = ('Chinese', 'Greek', 'Japanese', 'Korean', 'Russian', 'Ukrainian')
|
42
|
+
|
43
|
+
|
44
|
+
class Scriptures():
|
45
|
+
|
46
|
+
def __init__(self, language='English', translate=None, form=None, separator=' ', upper=False, verbose=False):
|
47
|
+
self._verbose = verbose
|
48
|
+
self._separator = separator
|
49
|
+
if language not in available_languages:
|
50
|
+
raise ValueError("Indicated source language is not an option!")
|
51
|
+
if translate:
|
52
|
+
if translate not in available_languages:
|
53
|
+
raise ValueError("Indicated translation language is not an option!")
|
54
|
+
else:
|
55
|
+
translate = language
|
56
|
+
if language in non_latin:
|
57
|
+
self._nl = True
|
58
|
+
else:
|
59
|
+
self._nl = False
|
60
|
+
self._rewrite = bool((language != translate) or form)
|
61
|
+
self._upper = upper
|
62
|
+
if form == "full":
|
63
|
+
form = 3
|
64
|
+
elif form == "standard":
|
65
|
+
form = 4
|
66
|
+
elif form == "official":
|
67
|
+
form = 5
|
68
|
+
else:
|
69
|
+
form = 3
|
70
|
+
self._src_book_names = {}
|
71
|
+
path = Path(__file__).resolve().parent
|
72
|
+
|
73
|
+
self._tr_book_names = ['Bible']
|
74
|
+
con = sqlite3.connect(path / 'res/resources.db')
|
75
|
+
cur = con.cursor()
|
76
|
+
for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{translate}';").fetchall():
|
77
|
+
if self._upper:
|
78
|
+
tr = rec[form].upper()
|
79
|
+
else:
|
80
|
+
tr = rec[form]
|
81
|
+
self._tr_book_names.insert(rec[2], tr)
|
82
|
+
for rec in cur.execute(f"SELECT * FROM Books WHERE Language = '{language}';").fetchall():
|
83
|
+
for i in range(3,6):
|
84
|
+
item = rec[i]
|
85
|
+
if not self._nl:
|
86
|
+
item = unidecode(item)
|
87
|
+
normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
|
88
|
+
self._src_book_names[normalized] = rec[2]
|
89
|
+
with open(path / 'res/custom.json', 'r', encoding='UTF-8') as json_file:
|
90
|
+
b = json.load(json_file)
|
91
|
+
if language in b.keys():
|
92
|
+
for row in b[language]:
|
93
|
+
names = row[1].split(', ')
|
94
|
+
for item in names:
|
95
|
+
if not self._nl:
|
96
|
+
item = unidecode(item)
|
97
|
+
normalized = regex.sub(r'\p{P}|\p{Z}', '', item.upper())
|
98
|
+
self._src_book_names[normalized] = row[0]
|
99
|
+
self._ranges = pd.read_sql_query("SELECT * FROM Ranges;", con)
|
100
|
+
self._verses = pd.read_sql_query("SELECT * FROM Verses;", con)
|
101
|
+
self._chapters = pd.read_sql_query("SELECT * FROM Chapters;", con)
|
102
|
+
cur.close()
|
103
|
+
con.close()
|
104
|
+
self._reported = []
|
105
|
+
self._encoded = {}
|
106
|
+
self._linked = {}
|
107
|
+
|
108
|
+
# Scripture reference parser:
|
109
|
+
self._first_pass = regex.compile(r"""(
|
110
|
+
{{.*?}} |
|
111
|
+
|
112
|
+
(?:[1-5] (?:\p{Z} |
|
113
|
+
\.\p{Z}? |
|
114
|
+
\p{Pd} |
|
115
|
+
\p{L}{1,2} (?:\p{Z} |
|
116
|
+
\.\p{Z}? |
|
117
|
+
\p{Pd}))? |
|
118
|
+
[IV]{1,3} (?:\p{Z} |
|
119
|
+
\.\p{Z}? |
|
120
|
+
\p{Pd}) )?
|
121
|
+
(?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
|
122
|
+
(?:\d+\p{Z}?[:,\.\p{Pd};]\p{Z}?)*
|
123
|
+
(?<=[\p{L},:\p{Pd}]\p{Z} |
|
124
|
+
[\p{L},:\p{Pd}] |
|
125
|
+
\.)\d+
|
126
|
+
(?![,\p{Pd}\p{L}]) |
|
127
|
+
|
128
|
+
(?:[1-5] (?:\p{Z} |
|
129
|
+
\.\p{Z}? |
|
130
|
+
\p{Pd} |
|
131
|
+
\p{L}{1,2} (?:\p{Z} |
|
132
|
+
\.\p{Z}? |
|
133
|
+
\p{Pd}))? |
|
134
|
+
[IV]{1,3} (?:\p{Z} |
|
135
|
+
\.\p{Z}? |
|
136
|
+
\p{Pd}) )
|
137
|
+
(?!.*[\p{Pd}\.]{2})\p{L}[\p{L}\p{Pd}\.]*\p{L}
|
138
|
+
)""", flags=regex.VERBOSE | regex.IGNORECASE)
|
139
|
+
|
140
|
+
self._second_pass = regex.compile(r"""(
|
141
|
+
(?![^{]*}) # ignore already marked
|
142
|
+
\p{L}[\p{L}\p{Pd}\.]+\p{Z}?
|
143
|
+
(?:\d+\p{Z}?[:,\p{Pd};]\p{Z}?)*\d+
|
144
|
+
(?![,\p{Pd}\p{L}])
|
145
|
+
)""", flags=regex.VERBOSE)
|
146
|
+
|
147
|
+
self._bk_ref = regex.compile(r"""
|
148
|
+
((?:[1-5]\p{L}{0,2} |
|
149
|
+
[IV]{1,3} )?
|
150
|
+
[\p{Pd}\.]?[\p{L}\p{Pd}\.\p{Z}]{2,})(.*)
|
151
|
+
""", flags=regex.VERBOSE | regex.IGNORECASE)
|
152
|
+
|
153
|
+
self._tagged = regex.compile(r'({{.*?}})')
|
154
|
+
|
155
|
+
self._cv_cv = regex.compile(r'(\d+):(\d+)-(\d+):(\d+)')
|
156
|
+
self._v_cv = regex.compile(r'(\d+)-(\d+):(\d+)')
|
157
|
+
self._cv_v = regex.compile(r'(\d+):(\d+)-(\d+)')
|
158
|
+
self._cv = regex.compile(r'(\d+):(\d+)')
|
159
|
+
self._ddd = regex.compile(r'(\d+),(\d+),(\d+)')
|
160
|
+
self._dd_d = regex.compile(r'(\d+),(\d+)-(\d+)')
|
161
|
+
self._d_dd = regex.compile(r'(\d+)-(\d+),(\d+)')
|
162
|
+
self._d_d = regex.compile(r'(\d+)-(\d+)(?!:)')
|
163
|
+
self._dd = regex.compile(r'(\d+),(\d+)')
|
164
|
+
self._d = regex.compile(r'(\d+)')
|
165
|
+
|
166
|
+
self._chunk = regex.compile(r'([^,;\p{Z}]+.*)')
|
167
|
+
self._sep = regex.compile(r'(?<!;)\s')
|
168
|
+
|
169
|
+
def _error_report(self, scripture, message):
|
170
|
+
if self._verbose and (scripture not in self._reported):
|
171
|
+
print(f'** "{scripture}" - {message}')
|
172
|
+
self._reported.append(scripture)
|
173
|
+
|
174
|
+
def _scripture_parts(self, scripture):
|
175
|
+
|
176
|
+
def check_book(bk_name):
|
177
|
+
if not self._nl:
|
178
|
+
bk_name = unidecode(bk_name) # NOTE: this converts Génesis to Genesis and English recognizes it !! Feature :-)
|
179
|
+
bk_name = regex.sub(r'\p{P}|\p{Z}', '', bk_name.upper())
|
180
|
+
if bk_name not in self._src_book_names:
|
181
|
+
return None, 0
|
182
|
+
else:
|
183
|
+
bk_num = self._src_book_names[bk_name]
|
184
|
+
return self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter.isnull()), ['Book', 'Last']].values[0]
|
185
|
+
|
186
|
+
reduced = regex.sub(r'\p{Z}', '', scripture)
|
187
|
+
reduced = regex.sub(r'\p{Pd}', '-', reduced)
|
188
|
+
result = self._bk_ref.search(reduced)
|
189
|
+
if result:
|
190
|
+
bk_name, rest = result.group(1).strip(), result.group(2).strip()
|
191
|
+
bk_num, last = check_book(bk_name)
|
192
|
+
if bk_num:
|
193
|
+
tr_name = self._tr_book_names[bk_num]
|
194
|
+
return tr_name, rest.replace('.', ':'), bk_num, last # for period notation cases (Gen 1.1)
|
195
|
+
return None, None, None, 0
|
196
|
+
|
197
|
+
def _locate_scriptures(self, text):
|
198
|
+
|
199
|
+
def r(match):
|
200
|
+
scripture = match.group(1)
|
201
|
+
if regex.match(r'{{.*}}', scripture):
|
202
|
+
tag = True
|
203
|
+
scripture = scripture.strip('}{')
|
204
|
+
else:
|
205
|
+
tag = False
|
206
|
+
if scripture in self._encoded.keys():
|
207
|
+
return '{{' + scripture +'}}'
|
208
|
+
_, rest, bk_num, last = self._scripture_parts(scripture)
|
209
|
+
if bk_num:
|
210
|
+
code = self._code_scripture(scripture, bk_num, rest, last) # validation performed
|
211
|
+
if code:
|
212
|
+
self._encoded[scripture] = code
|
213
|
+
return '{{' + scripture +'}}'
|
214
|
+
if tag:
|
215
|
+
return '»»|' + scripture +'|««' # So as not to lose {{ }} on unrecognized pre-tagged scriptures (other language, etc.)
|
216
|
+
else:
|
217
|
+
return scripture
|
218
|
+
|
219
|
+
self._reported = []
|
220
|
+
text = regex.sub(self._first_pass, r, text)
|
221
|
+
return regex.sub(self._second_pass, r, text)
|
222
|
+
|
223
|
+
|
224
|
+
def list_scriptures(self, text):
|
225
|
+
lst = []
|
226
|
+
text = self._locate_scriptures(text)
|
227
|
+
for scripture in regex.findall(self._tagged, text):
|
228
|
+
script = scripture.strip('}{')
|
229
|
+
if self._rewrite:
|
230
|
+
script = self.decode_scriptures(self._encoded[script])[0]
|
231
|
+
if self._upper:
|
232
|
+
script = script.upper()
|
233
|
+
lst.append(script)
|
234
|
+
return lst
|
235
|
+
|
236
|
+
def tag_scriptures(self, text):
|
237
|
+
return self.rewrite_scriptures(text, True)
|
238
|
+
|
239
|
+
def rewrite_scriptures(self, text, tag=False):
|
240
|
+
|
241
|
+
def r(match):
|
242
|
+
script = match.group(1).strip('}{')
|
243
|
+
if self._rewrite:
|
244
|
+
script = self.decode_scriptures(self._encoded[script])[0]
|
245
|
+
if self._upper:
|
246
|
+
script = script.upper()
|
247
|
+
if tag:
|
248
|
+
return '{{'+script+'}}'
|
249
|
+
else:
|
250
|
+
return script
|
251
|
+
|
252
|
+
text = self._locate_scriptures(text)
|
253
|
+
return regex.sub(self._tagged, r, text).replace('»»|', '{{').replace('|««', '}}')
|
254
|
+
|
255
|
+
|
256
|
+
def _code_scripture(self, scripture, bk_num, rest, last):
|
257
|
+
|
258
|
+
def reform_series(txt): # rewrite comma-separated consecutive sequences as (1, 2, 3) as ranges (1-3)
|
259
|
+
for result in self._d_dd.finditer(txt, overlapped=True):
|
260
|
+
end = result.group(3)
|
261
|
+
mid = result.group(2)
|
262
|
+
start = result.group(1)
|
263
|
+
if int(end) - int(mid) == 1:
|
264
|
+
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
265
|
+
for result in self._ddd.finditer(txt, overlapped=True):
|
266
|
+
end = result.group(3)
|
267
|
+
start = result.group(1)
|
268
|
+
if int(end) - int(start) == 2:
|
269
|
+
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
270
|
+
for result in self._ddd.finditer(txt, overlapped=True):
|
271
|
+
end = result.group(3)
|
272
|
+
start = result.group(1)
|
273
|
+
if int(end) - int(start) == 2:
|
274
|
+
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
275
|
+
for result in self._dd.finditer(txt, overlapped=True):
|
276
|
+
end = result.group(2)
|
277
|
+
start = result.group(1)
|
278
|
+
if int(end) - int(start) == 1:
|
279
|
+
txt = regex.sub(result.group(), f"{start}-{end}", txt)
|
280
|
+
return txt
|
281
|
+
|
282
|
+
def validate(b, ch, vs):
|
283
|
+
c = int(ch)
|
284
|
+
v = int(vs)
|
285
|
+
if not (0 < b <= 66): # book out of range
|
286
|
+
return None
|
287
|
+
if not (0 < c <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter.isnull()), ['Last']].values[0]): # chapter out of range
|
288
|
+
return None
|
289
|
+
if not (0 < v <= self._ranges.loc[(self._ranges.Book == b) & (self._ranges.Chapter == c), ['Last']].values[0]): # verse out of range
|
290
|
+
return None
|
291
|
+
return True
|
292
|
+
|
293
|
+
def code_verses(chunk, book, multi):
|
294
|
+
b = str(book).zfill(2)
|
295
|
+
|
296
|
+
result = self._cv_cv.search(chunk)
|
297
|
+
if result:
|
298
|
+
c = result.group(1)
|
299
|
+
v = result.group(2)
|
300
|
+
if not validate(book, c, v):
|
301
|
+
return None, 0
|
302
|
+
ch1 = c.zfill(3)
|
303
|
+
v1 = v.zfill(3)
|
304
|
+
|
305
|
+
c = result.group(3)
|
306
|
+
v = result.group(4)
|
307
|
+
if not validate(book, c, v):
|
308
|
+
return None, 0
|
309
|
+
ch2 = c.zfill(3)
|
310
|
+
v2 = v.zfill(3)
|
311
|
+
return (b+ch1+v1, b+ch2+v2), ch2
|
312
|
+
|
313
|
+
result = self._cv_v.search(chunk)
|
314
|
+
if result:
|
315
|
+
c = result.group(1)
|
316
|
+
v = result.group(2)
|
317
|
+
if not validate(book, c, v):
|
318
|
+
return None, 0
|
319
|
+
ch1 = c.zfill(3)
|
320
|
+
v1 = v.zfill(3)
|
321
|
+
|
322
|
+
v = result.group(3)
|
323
|
+
if not validate(book, c, v):
|
324
|
+
return None, 0
|
325
|
+
v2 = v.zfill(3)
|
326
|
+
return (b+ch1+v1, b+ch1+v2), ch1
|
327
|
+
|
328
|
+
result = self._v_cv.search(chunk)
|
329
|
+
if result:
|
330
|
+
c = str(ch)
|
331
|
+
v = result.group(1)
|
332
|
+
if not validate(book, c, v):
|
333
|
+
return None, 0
|
334
|
+
ch1 = c
|
335
|
+
v1 = v.zfill(3)
|
336
|
+
|
337
|
+
c = result.group(2)
|
338
|
+
v = result.group(3)
|
339
|
+
if not validate(book, c, v):
|
340
|
+
return None, 0
|
341
|
+
ch2 = c.zfill(3)
|
342
|
+
v2 = v.zfill(3)
|
343
|
+
return (b+ch1+v1, b+ch2+v2), ch2
|
344
|
+
|
345
|
+
result = self._cv.search(chunk)
|
346
|
+
if result:
|
347
|
+
c = result.group(1)
|
348
|
+
v = result.group(2)
|
349
|
+
if not validate(book, c, v):
|
350
|
+
return None, 0
|
351
|
+
ch1 = c.zfill(3)
|
352
|
+
v1 = v.zfill(3)
|
353
|
+
return (b+ch1+v1, b+ch1+v1), ch1
|
354
|
+
|
355
|
+
result = self._d_d.search(chunk)
|
356
|
+
if result:
|
357
|
+
if multi:
|
358
|
+
c = result.group(1)
|
359
|
+
v = 1
|
360
|
+
if not validate(book, c, v):
|
361
|
+
return None, 0
|
362
|
+
ch1 = c.zfill(3)
|
363
|
+
v1 = '001'
|
364
|
+
|
365
|
+
c = result.group(2)
|
366
|
+
if not validate(book, c, v):
|
367
|
+
return None, 0
|
368
|
+
ch2 = c.zfill(3)
|
369
|
+
v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch2)), ['Last']].values[0][0]).zfill(3)
|
370
|
+
return (b+ch1+v1, b+ch2+v2), None
|
371
|
+
else:
|
372
|
+
c = 1
|
373
|
+
v = result.group(1)
|
374
|
+
if not validate(book, c, v):
|
375
|
+
return None, 0
|
376
|
+
ch1 = '001'
|
377
|
+
v1 = v.zfill(3)
|
378
|
+
|
379
|
+
v = result.group(2)
|
380
|
+
if not validate(book, c, v):
|
381
|
+
return None, 0
|
382
|
+
ch2 = ch1
|
383
|
+
v2 = v.zfill(3)
|
384
|
+
return (b+ch1+v1, b+ch2+v2), ch2
|
385
|
+
|
386
|
+
result = self._d.search(chunk)
|
387
|
+
if result:
|
388
|
+
if multi:
|
389
|
+
c = result.group(1)
|
390
|
+
v = 1
|
391
|
+
if not validate(book, c, v):
|
392
|
+
return None, 0
|
393
|
+
ch1 = c.zfill(3)
|
394
|
+
v1 = '001'
|
395
|
+
v2 = str(self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == int(ch1)), ['Last']].values[0][0]).zfill(3)
|
396
|
+
return (b+ch1+v1, b+ch1+v2), None
|
397
|
+
else:
|
398
|
+
c = 1
|
399
|
+
v = result.group(1)
|
400
|
+
if not validate(book, c, v):
|
401
|
+
return None, 0
|
402
|
+
ch1 = '001'
|
403
|
+
v1 = v.zfill(3)
|
404
|
+
return (b+ch1+v1, b+ch1+v1), None
|
405
|
+
|
406
|
+
return None, None
|
407
|
+
|
408
|
+
lst = []
|
409
|
+
if rest == '': # whole book
|
410
|
+
v = self._ranges.loc[(self._ranges.Book == bk_num) & (self._ranges.Chapter == last), ['Last']].values[0][0]
|
411
|
+
if last == 1:
|
412
|
+
rest = f'1-{v}'
|
413
|
+
else:
|
414
|
+
rest = f'1:1-{last}:{v}'
|
415
|
+
else:
|
416
|
+
rest = reform_series(rest)
|
417
|
+
for chunk in rest.split(';'):
|
418
|
+
ch = None
|
419
|
+
for bit in chunk.split(','):
|
420
|
+
if ch:
|
421
|
+
tup, ch = code_verses(f"{ch}:{bit}", bk_num, last>1)
|
422
|
+
else:
|
423
|
+
tup, ch = code_verses(bit, bk_num, last>1)
|
424
|
+
if not tup:
|
425
|
+
self._error_report(scripture, f'"{bit.strip()}" OUT OF RANGE')
|
426
|
+
return None
|
427
|
+
lst.append(tup)
|
428
|
+
return lst
|
429
|
+
|
430
|
+
def code_scriptures(self, text):
|
431
|
+
text = self._locate_scriptures(text)
|
432
|
+
lst = []
|
433
|
+
for scripture in regex.findall(self._tagged, text):
|
434
|
+
bcv_ranges = self._encoded[scripture.strip('}{')]
|
435
|
+
for bcv_range in bcv_ranges:
|
436
|
+
lst.append(bcv_range)
|
437
|
+
return lst
|
438
|
+
|
439
|
+
|
440
|
+
def _decode_scripture(self, bcv_range, book='', chap=0, sep=';'):
|
441
|
+
if not bcv_range:
|
442
|
+
return None, '', 0, False
|
443
|
+
start, end = bcv_range
|
444
|
+
sb = int(start[:2])
|
445
|
+
sc = int(start[2:5])
|
446
|
+
sv = int(start[5:])
|
447
|
+
eb = int(end[:2])
|
448
|
+
ec = int(end[2:5])
|
449
|
+
ev = int(end[5:])
|
450
|
+
|
451
|
+
if not (sb == eb):
|
452
|
+
return None, '', 0, False
|
453
|
+
if not ((0 < sb <= 66) & (sb == eb)): # book out of range
|
454
|
+
return None, '', 0, False
|
455
|
+
lc = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter.isnull()), ['Last']].values[0][0]
|
456
|
+
if not (0 < sc <= ec <= lc): # chapter(s) out of range
|
457
|
+
return None, '', 0, False
|
458
|
+
se = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == sc), ['Last']].values[0][0]
|
459
|
+
le = self._ranges.loc[(self._ranges.Book == sb) & (self._ranges.Chapter == ec), ['Last']].values[0][0]
|
460
|
+
if not ((0 < sv <= se) & (0 < ev <= le)): # verse(s) out of range
|
461
|
+
return None, '', 0, False
|
462
|
+
bk_name = self._tr_book_names[sb]
|
463
|
+
if book == bk_name:
|
464
|
+
cont = True
|
465
|
+
else:
|
466
|
+
cont = False
|
467
|
+
book = bk_name
|
468
|
+
c = ec - sc + 1
|
469
|
+
v = ev - sv + 1
|
470
|
+
if lc == 1:
|
471
|
+
if cont:
|
472
|
+
bk_name = ','
|
473
|
+
if v == le:
|
474
|
+
scripture = f"{bk_name.strip(',')}"
|
475
|
+
elif v == 1:
|
476
|
+
scripture = f"{bk_name} {sv}"
|
477
|
+
elif v == 2:
|
478
|
+
scripture = f"{bk_name} {sv}, {ev}"
|
479
|
+
else:
|
480
|
+
scripture = f"{bk_name} {sv}‑{ev}"
|
481
|
+
sep = ';'
|
482
|
+
else:
|
483
|
+
ch = f"{sc}:"
|
484
|
+
if v == le:
|
485
|
+
if cont:
|
486
|
+
bk_name = sep
|
487
|
+
if c == lc:
|
488
|
+
scripture = f"{bk_name.strip(',')}"
|
489
|
+
elif c == 1:
|
490
|
+
scripture = f"{bk_name} {sc}"
|
491
|
+
elif c == 2:
|
492
|
+
scripture = f"{bk_name} {sc}, {ec}"
|
493
|
+
else:
|
494
|
+
scripture = f"{bk_name} {sc}‑{ec}"
|
495
|
+
sep = ','
|
496
|
+
elif c == 1:
|
497
|
+
if cont:
|
498
|
+
if sc == chap:
|
499
|
+
bk_name = ''
|
500
|
+
ch = ', '
|
501
|
+
else:
|
502
|
+
bk_name = ';'
|
503
|
+
if v == 1:
|
504
|
+
scripture = f"{bk_name} {ch}{sv}"
|
505
|
+
elif v == 2:
|
506
|
+
scripture = f"{bk_name} {ch}{sv}, {ev}"
|
507
|
+
else:
|
508
|
+
scripture = f"{bk_name} {ch}{sv}‑{ev}"
|
509
|
+
sep = ';'
|
510
|
+
else:
|
511
|
+
if cont and (sc == chap):
|
512
|
+
bk_name = ''
|
513
|
+
ch = ', '
|
514
|
+
scripture = f"{bk_name} {ch}{sv}‑{ec}:{ev}"
|
515
|
+
sep = ';'
|
516
|
+
chap = ec
|
517
|
+
if self._separator != ' ':
|
518
|
+
scripture = regex.sub(self._sep, self._separator, scripture)
|
519
|
+
return scripture.strip(), book, chap, cont, sep
|
520
|
+
|
521
|
+
def decode_scriptures(self, bcv_ranges=[]):
|
522
|
+
scriptures = []
|
523
|
+
bk = ''
|
524
|
+
ch = 0
|
525
|
+
sep = ';'
|
526
|
+
for bcv_range in bcv_ranges:
|
527
|
+
scripture, bk, ch, cont, sep = self._decode_scripture(bcv_range, bk, ch, sep)
|
528
|
+
if scripture:
|
529
|
+
if cont:
|
530
|
+
scriptures[-1] = scriptures[-1] + scripture
|
531
|
+
else:
|
532
|
+
scriptures.append(scripture)
|
533
|
+
return scriptures
|
534
|
+
|
535
|
+
|
536
|
+
def link_scriptures(self, text, prefix='<a href=', suffix='>'): # NOTE: this always rewrites (full by default) - what if one wants to leave as is??
|
537
|
+
|
538
|
+
def convert_range(bcv_range):
|
539
|
+
if not bcv_range:
|
540
|
+
return None, None
|
541
|
+
start, end = bcv_range
|
542
|
+
sb = int(start[:2])
|
543
|
+
sc = int(start[2:5])
|
544
|
+
sv = int(start[5:])
|
545
|
+
eb = int(end[:2])
|
546
|
+
ec = int(end[2:5])
|
547
|
+
ev = int(end[5:])
|
548
|
+
if start == end:
|
549
|
+
return f"{sb}:{sc}:{sv}"
|
550
|
+
else:
|
551
|
+
return f"{sb}:{sc}:{sv}-{eb}:{ec}:{ev}"
|
552
|
+
|
553
|
+
def r1(match):
|
554
|
+
|
555
|
+
def r2(match):
|
556
|
+
return f'{prefix}{lnk}{suffix}{match.group(1)}</a>'
|
557
|
+
|
558
|
+
scripture = match.group(1).strip('}{')
|
559
|
+
if scripture in self._linked.keys():
|
560
|
+
return self._linked[scripture]
|
561
|
+
output = ''
|
562
|
+
bk = ''
|
563
|
+
ch = 0
|
564
|
+
sep = ';'
|
565
|
+
for bcv_range in self._encoded[scripture]:
|
566
|
+
scrip, bk, ch, _, sep = self._decode_scripture(bcv_range, bk, ch, sep)
|
567
|
+
lnk = convert_range(bcv_range)
|
568
|
+
output += regex.sub(self._chunk, r2, scrip)
|
569
|
+
self._linked[scripture] = output.strip(' ;,')
|
570
|
+
if self._upper:
|
571
|
+
output = output.upper()
|
572
|
+
return output.strip(' ;,')
|
573
|
+
|
574
|
+
text = self._locate_scriptures(text)
|
575
|
+
return regex.sub(self._tagged, r1, text).replace('»»|', '{{').replace('|««', '}}')
|
576
|
+
|
577
|
+
|
578
|
+
def serial_chapter_number(self, bcv):
|
579
|
+
try:
|
580
|
+
return int(self._chapters.loc[(self._chapters['Book'] == int(bcv[0:2])) & (self._chapters['Chapter'] == int(bcv[2:5]))].values[0][0])
|
581
|
+
except:
|
582
|
+
self._error_report(bcv, 'OUT OF RANGE')
|
583
|
+
return None
|
584
|
+
|
585
|
+
def serial_verse_number(self, bcv):
|
586
|
+
try:
|
587
|
+
return int(self._verses.loc[(self._verses['Book'] == int(bcv[0:2])) & (self._verses['Chapter'] == int(bcv[2:5])) & (self._verses['Verse'] == int(bcv[5:]))].values[0][0])
|
588
|
+
except:
|
589
|
+
self._error_report(bcv, 'OUT OF RANGE')
|
590
|
+
return None
|
591
|
+
|
592
|
+
def code_chapter(self, chapter):
|
593
|
+
try:
|
594
|
+
book, chapter = self._chapters[self._chapters['ChapterId'] == int(chapter)].values[0][1:]
|
595
|
+
last = self._ranges.loc[(self._ranges.Book == book) & (self._ranges.Chapter == chapter), ['Last']].values[0][0]
|
596
|
+
bcv = str(book).zfill(2) + str(chapter).zfill(3)
|
597
|
+
return f"('{bcv}001', '{bcv}{str(last).zfill(3)}')"
|
598
|
+
except:
|
599
|
+
self._error_report(chapter, 'OUT OF RANGE')
|
600
|
+
return None
|
601
|
+
|
602
|
+
def code_verse(self, verse):
|
603
|
+
bcv = ''
|
604
|
+
try:
|
605
|
+
for i in self._verses[self._verses['VerseId'] == int(verse)].values[0][1:]:
|
606
|
+
bcv += str(i).zfill(3)
|
607
|
+
return f"('{bcv[1:]}', '{bcv[1:]}')"
|
608
|
+
except:
|
609
|
+
self._error_report(verse, 'OUT OF RANGE')
|
610
|
+
return None
|
611
|
+
|
612
|
+
|
613
|
+
def _main(args):
|
614
|
+
|
615
|
+
def switchboard(text):
|
616
|
+
if args['cc']:
|
617
|
+
return s.code_chapter(args['cc'])
|
618
|
+
elif args['cv']:
|
619
|
+
return s.code_verse(args['cv'])
|
620
|
+
elif args['sc']:
|
621
|
+
return s.serial_chapter_number(args['sc'])
|
622
|
+
elif args['sv']:
|
623
|
+
return s.serial_verse_number(args['sv'])
|
624
|
+
if args['l'] is not None:
|
625
|
+
prefix = '<a href='
|
626
|
+
suffix = '>'
|
627
|
+
if len(args['l']) > 1:
|
628
|
+
suffix = args['l'][1]
|
629
|
+
if len(args['l']) > 0:
|
630
|
+
prefix = args['l'][0]
|
631
|
+
return s.link_scriptures(text, prefix, suffix)
|
632
|
+
elif args['c']:
|
633
|
+
return s.code_scriptures(text)
|
634
|
+
elif args['d']:
|
635
|
+
return s.decode_scriptures(literal_eval(text))
|
636
|
+
elif args['x']:
|
637
|
+
return s.list_scriptures(text)
|
638
|
+
elif args['t']:
|
639
|
+
return s.tag_scriptures(text)
|
640
|
+
else:
|
641
|
+
return s.rewrite_scriptures(text)
|
642
|
+
|
643
|
+
form = None
|
644
|
+
if args['standard']:
|
645
|
+
form = 'standard'
|
646
|
+
elif args['official']:
|
647
|
+
form = 'official'
|
648
|
+
elif args['full']:
|
649
|
+
form = 'full'
|
650
|
+
|
651
|
+
s = Scriptures(language=args['language'], translate=args['translate'], form=form, separator=args['s'], upper=args['u'], verbose=(not args['q']))
|
652
|
+
|
653
|
+
if args['f']:
|
654
|
+
if args['o'] and (args['o'] == args['f']):
|
655
|
+
print('Make sure in-file and out-file are different!\n')
|
656
|
+
exit()
|
657
|
+
with open(args['f'], 'r', encoding='UTF-8') as f:
|
658
|
+
txt = f.read()
|
659
|
+
else:
|
660
|
+
txt = args['r']
|
661
|
+
|
662
|
+
if txt:
|
663
|
+
txt = switchboard(txt)
|
664
|
+
else:
|
665
|
+
print(parser.format_help())
|
666
|
+
exit()
|
667
|
+
|
668
|
+
if args['o']:
|
669
|
+
with open(args['o'], 'w', encoding='UTF-8') as f:
|
670
|
+
f.write(str(txt))
|
671
|
+
else:
|
672
|
+
print(txt)
|
673
|
+
|
674
|
+
if __name__ == "__main__":
|
675
|
+
PROJECT_PATH = Path(__file__).resolve().parent
|
676
|
+
APP = Path(__file__).stem
|
677
|
+
parser = argparse.ArgumentParser(description="PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode. See README for more information")
|
678
|
+
|
679
|
+
parser.add_argument('-v', action='version', version=f"{APP} {VERSION}", help='show version and exit')
|
680
|
+
parser.add_argument('-q', action='store_true', help="don't show errors")
|
681
|
+
|
682
|
+
function_group = parser.add_argument_group('data source (one required - except for auxiliary functions, which only take command-line arguments)', 'choose between terminal or file input:')
|
683
|
+
mode = function_group.add_mutually_exclusive_group()
|
684
|
+
mode.add_argument('-f', metavar='in-file', help='get input from file (UTF-8)')
|
685
|
+
mode.add_argument('-r', metavar='reference', help='process "reference; reference; etc."')
|
686
|
+
parser.add_argument('-o', metavar='out-file', help='output file (terminal output if not provided)')
|
687
|
+
|
688
|
+
parser.add_argument('--language', default='English', choices=available_languages, help='indicate source language for book names (English if unspecified)')
|
689
|
+
parser.add_argument('--translate', choices=available_languages, help='indicate output language for book names (same as source if unspecified)')
|
690
|
+
parser.add_argument('-s', metavar='separator', default=' ', help='segment separator (space by default)')
|
691
|
+
parser.add_argument('-u', action='store_true', help='capitalize (upper-case) book names')
|
692
|
+
format_group = parser.add_argument_group('output format (optional)', 'if provided, book names will be rewritten accordingly:')
|
693
|
+
formats = format_group.add_mutually_exclusive_group()
|
694
|
+
formats.add_argument('--full', action='store_true', help='output as full name - default (eg., "Genesis")')
|
695
|
+
formats.add_argument('--official', action='store_true', help='output as official abbreviation (eg., "Ge")')
|
696
|
+
formats.add_argument('--standard', action='store_true', help='output as standard abbreviation (eg., "Gen.")')
|
697
|
+
|
698
|
+
type_group = parser.add_argument_group('type of conversion', 'if not specified, references are simply rewritten according to chosen (or default) output format:')
|
699
|
+
tpe = type_group.add_mutually_exclusive_group(required=False)
|
700
|
+
tpe.add_argument('-c', action='store_true', help='encode as BCV-notation ranges')
|
701
|
+
tpe.add_argument('-d', action='store_true', help='decode list of BCV-notation ranges')
|
702
|
+
tpe.add_argument('-l', nargs='*', metavar=('prefix', 'suffix'), help='create <a></a> links; provide a "prefix" and a "suffix" (or neither for testing)')
|
703
|
+
tpe.add_argument('-t', action='store_true', help='tag scriptures with {{ }}')
|
704
|
+
tpe.add_argument('-x', action='store_true', help='extract list of scripture references')
|
705
|
+
|
706
|
+
aux_group = parser.add_argument_group('auxiliary functions')
|
707
|
+
aux = aux_group.add_mutually_exclusive_group(required=False)
|
708
|
+
aux.add_argument('-sc', metavar=('BCV'), help='return the serial number (1-1189) of the chapter with code "BCV" ("bbcccvvv")')
|
709
|
+
aux.add_argument('-sv', metavar=('BCV'), help='return the serial number (1-31091) of the verse with code "BCV" ("bbcccvvv")')
|
710
|
+
aux.add_argument('-cc', metavar=('chapter'), help='return the BCV range for serial chapter number "chapter" (integer value)')
|
711
|
+
aux.add_argument('-cv', metavar=('verse'), help='return the BCV code for serial verse number "verse" (integer value)')
|
712
|
+
args = parser.parse_args()
|
713
|
+
_main(vars(args))
|
@@ -1,6 +1,6 @@
|
|
1
1
|
[project]
|
2
2
|
name = "linkture"
|
3
|
-
version = "2.5.
|
3
|
+
version = "2.5.4"
|
4
4
|
description = "PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode"
|
5
5
|
authors = [
|
6
6
|
{ name = "Eryk J.", email = "infiniti@inventati.org" },
|
@@ -10,7 +10,7 @@ dependencies = [
|
|
10
10
|
"argparse>=1.4.0",
|
11
11
|
"regex>=2023.8.8",
|
12
12
|
"unidecode>=1.3.8",
|
13
|
-
"pathlib",
|
13
|
+
"pathlib>=1.0.1",
|
14
14
|
"pandas==2.2.2",
|
15
15
|
]
|
16
16
|
requires-python = ">=3.9"
|
@@ -18,9 +18,15 @@ readme = "README.md"
|
|
18
18
|
classifiers = [
|
19
19
|
"License :: OSI Approved :: MIT License",
|
20
20
|
"Operating System :: OS Independent",
|
21
|
-
"Programming Language :: Python :: 3",
|
21
|
+
"Programming Language :: Python :: 3.9",
|
22
|
+
"Programming Language :: Python :: 3.10",
|
23
|
+
"Programming Language :: Python :: 3.11",
|
24
|
+
"Programming Language :: Python :: 3.12",
|
22
25
|
"Development Status :: 5 - Production/Stable",
|
23
26
|
"Environment :: Console",
|
27
|
+
"Topic :: Religion",
|
28
|
+
"Topic :: Text Processing :: General",
|
29
|
+
"Topic :: Text Processing :: Linguistic",
|
24
30
|
]
|
25
31
|
keywords = [
|
26
32
|
"bible",
|
linkture-2.5.4/setup.py
ADDED
@@ -0,0 +1,49 @@
|
|
1
|
+
import setuptools
|
2
|
+
from pathlib import Path
|
3
|
+
|
4
|
+
work_dir = Path(__file__).parent
|
5
|
+
long_description = (work_dir / 'README.md').read_text()
|
6
|
+
|
7
|
+
|
8
|
+
setuptools.setup(
|
9
|
+
|
10
|
+
name='linkture',
|
11
|
+
version='2.5.4',
|
12
|
+
author='Eryk J.',
|
13
|
+
author_email='infiniti@inventati.org',
|
14
|
+
url='https://github.com/erykjj/linkture',
|
15
|
+
license='MIT',
|
16
|
+
|
17
|
+
description='PARSE and PROCESS BIBLE SCRIPTURE REFERENCES: extract, tag, link, rewrite, translate, BCV-encode and decode',
|
18
|
+
long_description=long_description,
|
19
|
+
long_description_content_type='text/markdown',
|
20
|
+
keywords=[
|
21
|
+
'bible', 'scriptures', 'scripture-references', 'scripture-translation',
|
22
|
+
'scripture-parser', 'scripture-linker'
|
23
|
+
],
|
24
|
+
|
25
|
+
packages=setuptools.find_packages(),
|
26
|
+
classifiers=[
|
27
|
+
'License :: OSI Approved :: MIT License',
|
28
|
+
'Operating System :: OS Independent',
|
29
|
+
'Programming Language :: Python :: 3.9',
|
30
|
+
'Programming Language :: Python :: 3.10',
|
31
|
+
'Programming Language :: Python :: 3.11',
|
32
|
+
'Programming Language :: Python :: 3.12',
|
33
|
+
'Development Status :: 5 - Production/Stable',
|
34
|
+
'Environment :: Console',
|
35
|
+
'Topic :: Religion',
|
36
|
+
'Topic :: Text Processing :: General',
|
37
|
+
'Topic :: Text Processing :: Linguistic'
|
38
|
+
],
|
39
|
+
python_requires='>=3.9',
|
40
|
+
install_requires=[
|
41
|
+
'setuptools>=59.6.0',
|
42
|
+
'argparse>=1.4.0',
|
43
|
+
'regex>=2023.8.8',
|
44
|
+
'unidecode>=1.3.8',
|
45
|
+
'pathlib>=1.0.1',
|
46
|
+
'pandas==2.2.2'
|
47
|
+
]
|
48
|
+
|
49
|
+
)
|
linkture-2.5.3/tests/__init__.py
DELETED
File without changes
|
File without changes
|
File without changes
|