skrutable 2.4.0__tar.gz → 2.5.1__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skrutable-2.4.0 → skrutable-2.5.1}/PKG-INFO +1 -1
- skrutable-2.5.1/src/skrutable/__init__.py +1 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/manual.md +25 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/meter_identification.py +178 -22
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/scansion.py +9 -4
- skrutable-2.5.1/src/skrutable/utils.py +20 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable.egg-info/PKG-INFO +1 -1
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable.egg-info/SOURCES.txt +1 -0
- skrutable-2.4.0/src/skrutable/__init__.py +0 -1
- {skrutable-2.4.0 → skrutable-2.5.1}/LICENSE.md +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/README.md +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/setup.cfg +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/setup.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/config.json +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/config.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/generate_scheme_vectors.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/impossible_bigrams.json +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/meter_patterns.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/phonemes.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/run_examples.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/scheme_vectors.json +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/splitting.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/transliteration.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.4.0 → skrutable-2.5.1}/src/skrutable.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.5.1"
|
|
@@ -130,6 +130,31 @@ Key terms:
|
|
|
130
130
|
* *jāti*: four quarters with set patterns of total moraic length
|
|
131
131
|
|
|
132
132
|
|
|
133
|
+
# scan timing profiling
|
|
134
|
+
|
|
135
|
+
`skrutable` includes a built-in profiling system for measuring meter identification performance across a corpus. It is disabled by default (`utils._DEBUG_TIMING = False`) and has no runtime cost unless explicitly enabled.
|
|
136
|
+
|
|
137
|
+
To use it, set `_DEBUG_TIMING` before importing `MeterIdentifier`:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import skrutable.utils as _utils
|
|
141
|
+
_utils._DEBUG_TIMING = True
|
|
142
|
+
|
|
143
|
+
from skrutable.meter_identification import MeterIdentifier, flush_profiling_report
|
|
144
|
+
|
|
145
|
+
MI = MeterIdentifier()
|
|
146
|
+
for verse in my_verses:
|
|
147
|
+
MI.identify_meter(verse, resplit_option="resplit_lite", resplit_keep_midpoint=True, from_scheme="IAST") # e.g.
|
|
148
|
+
|
|
149
|
+
flush_profiling_report() # prints table to stderr, resets counters
|
|
150
|
+
# flush_profiling_report(write_file=True) # also writes profiling_debug.txt
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
The table breaks down wall-clock time per meter category with columns for each scan sub-phase (`clean`, `transl`, `syl`, `wts`, `mor+g`) and each identification type (`anuṣṭ`, `samav`, `jāti`, etc.), plus perfect/imperfect verse counts per category.
|
|
154
|
+
|
|
155
|
+
If using the front end, `make launch-profiling` (or `./launch.sh --scan-profiling`) enables profiling for the server process without touching any source files.
|
|
156
|
+
|
|
157
|
+
|
|
133
158
|
# sandhi and compound splitting
|
|
134
159
|
|
|
135
160
|
`skrutable` provides a wrapper for applying pre-trained splitting models via separate online servers ([my own splitter_server for the 2018 model](https://2018emnlp-sanskrit-splitter-server.duckdns.org/) and https://dharmamitra.org). A working internet connection is required for this functionality. The wrapper preserves original sentence length and punctuation, and it also helps utilize the Dharmamitra ByT5-Sanskrit model's ability to distinguish compounds from inter-word breaks.
|
|
@@ -1,6 +1,7 @@
|
|
|
1
1
|
from skrutable.scansion import Scanner as Sc
|
|
2
2
|
from skrutable import meter_patterns
|
|
3
3
|
from skrutable.config import load_config_dict_from_json_file
|
|
4
|
+
from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
|
|
4
5
|
import re
|
|
5
6
|
from copy import copy
|
|
6
7
|
from dataclasses import dataclass
|
|
@@ -14,6 +15,120 @@ default_resplit_keep_midpoint = config["default_resplit_keep_midpoint"] # e.g.
|
|
|
14
15
|
disable_non_trizwuB_upajAti = config["disable_non_trizwuB_upajAti"] # e.g. True
|
|
15
16
|
meter_scores = config["meter_scores"] # dict
|
|
16
17
|
|
|
18
|
+
_category_totals = {} # { category: { section: float seconds } }, single source of truth
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
_ARDHASAMAVRTTA_NAMES = [
|
|
22
|
+
'aparavaktra', 'upacitra', 'puṣpitāgrā', 'viyoginī', 'vegavatī',
|
|
23
|
+
'hariṇaplutā', 'aupacchandasika', 'ajñātārdhasamavṛtta',
|
|
24
|
+
]
|
|
25
|
+
_JATI_SUBCATS = ['āryā', 'gīti', 'upagīti', 'udgīti', 'āryāgīti']
|
|
26
|
+
|
|
27
|
+
def _meter_label_to_category(label):
|
|
28
|
+
if not label or 'adhyavasitam' in label:
|
|
29
|
+
return 'na kiṃcid adhyavasitam'
|
|
30
|
+
if 'anuṣṭubh' in label or 'anustubh' in label:
|
|
31
|
+
return 'anuṣṭubh'
|
|
32
|
+
if 'upajāti' in label:
|
|
33
|
+
return 'upajāti'
|
|
34
|
+
if any(label.startswith(n) for n in _ARDHASAMAVRTTA_NAMES):
|
|
35
|
+
return 'ardhasamavṛtta'
|
|
36
|
+
if 'ardhasamavṛtta' in label:
|
|
37
|
+
return 'ardhasamavṛtta'
|
|
38
|
+
if label.startswith('udgatā'):
|
|
39
|
+
return 'viṣamavṛtta'
|
|
40
|
+
if any(label.startswith(s) for s in _JATI_SUBCATS):
|
|
41
|
+
return 'jāti'
|
|
42
|
+
if 'jāti' in label or 'vaitālīya' in label or 'mātrā' in label:
|
|
43
|
+
return 'jāti'
|
|
44
|
+
return 'samavṛtta'
|
|
45
|
+
|
|
46
|
+
|
|
47
|
+
def _verse_is_perfect(V):
|
|
48
|
+
"""True iff V.is_perfect was set True at identification time."""
|
|
49
|
+
return getattr(V, 'is_perfect', False)
|
|
50
|
+
|
|
51
|
+
|
|
52
|
+
def flush_profiling_report(write_file=False):
|
|
53
|
+
"""Print the accumulated profiling table to stderr, then reset all counters.
|
|
54
|
+
|
|
55
|
+
Pass write_file=True to also write the table to profiling_debug.txt alongside the library source.
|
|
56
|
+
Safe to call even when _DEBUG_TIMING is False (no-op).
|
|
57
|
+
"""
|
|
58
|
+
if not _DEBUG_TIMING or not _category_totals:
|
|
59
|
+
return
|
|
60
|
+
import sys, os
|
|
61
|
+
scan_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana')
|
|
62
|
+
type_keys = ('anuzwuB', 'samavftta', 'upajAti', 'vizamavftta', 'jAti')
|
|
63
|
+
type_abbrev = {
|
|
64
|
+
'anuzwuB': 'anuṣṭ', 'samavftta': 'samav', 'upajAti': 'upajāti', 'vizamavftta': 'vizama', 'jAti': 'jāti',
|
|
65
|
+
}
|
|
66
|
+
scan_abbrev = {'scan_clean': 'clean', 'scan_translit': 'transl', 'scan_syllabify': 'syl', 'scan_weights': 'wts', 'scan_morae_gana': 'mor+g'}
|
|
67
|
+
cat_order = ['anuṣṭubh', 'samavṛtta', 'upajāti', 'ardhasamavṛtta', 'viṣamavṛtta', 'jāti', 'na kiṃcid adhyavasitam']
|
|
68
|
+
hdr_scan_abbrevs = [scan_abbrev[k] for k in scan_keys]
|
|
69
|
+
hdr_type_abbrevs = [type_abbrev[k] for k in type_keys]
|
|
70
|
+
val_w = len('0.00s')
|
|
71
|
+
col_cat_w = max(len(c) for c in cat_order + ['category']) + 2
|
|
72
|
+
sub_w = max(len('scan∑'), len('types∑'), len('total'), val_w) + 2
|
|
73
|
+
scan_col_ws = [max(len(a), val_w) + 1 for a in hdr_scan_abbrevs]
|
|
74
|
+
type_col_ws = [max(len(a), val_w) + 1 for a in hdr_type_abbrevs]
|
|
75
|
+
all_counts = [b.get('_count', 0) for b in _category_totals.values()]
|
|
76
|
+
count_w = max(len(str(max(all_counts))) if all_counts else 1, len('perf'), len('impf')) + 1
|
|
77
|
+
|
|
78
|
+
def fmt_row(scan_vals, type_vals):
|
|
79
|
+
return (' '.join(v.rjust(w) for v, w in zip(scan_vals, scan_col_ws))
|
|
80
|
+
+ ' ' + ' '.join(v.rjust(w) for v, w in zip(type_vals, type_col_ws)))
|
|
81
|
+
|
|
82
|
+
n_verses = sum(b.get('_count', 0) for b in _category_totals.values())
|
|
83
|
+
wiggle_count = _section_totals.get('wiggle_count', 0)
|
|
84
|
+
lines = [f'\n=== {n_verses} verses / {wiggle_count} resplit candidates ===']
|
|
85
|
+
hdr = (' ' + 'category'.ljust(col_cat_w)
|
|
86
|
+
+ 'perf'.rjust(count_w) + 'impf'.rjust(count_w)
|
|
87
|
+
+ 'total'.rjust(sub_w) + 'scan∑'.rjust(sub_w) + 'types∑'.rjust(sub_w)
|
|
88
|
+
+ ' ' + fmt_row(hdr_scan_abbrevs, hdr_type_abbrevs))
|
|
89
|
+
sep_w = col_cat_w + count_w * 2 + sub_w * 3 + 2 + sum(w + 2 for w in scan_col_ws) - 2 + 2 + sum(w + 2 for w in type_col_ws) - 2
|
|
90
|
+
sep = ' ' + '-' * sep_w
|
|
91
|
+
lines += [hdr, sep]
|
|
92
|
+
total_perfect = 0
|
|
93
|
+
total_imperfect = 0
|
|
94
|
+
for cat in cat_order:
|
|
95
|
+
bucket = _category_totals.get(cat)
|
|
96
|
+
if not bucket:
|
|
97
|
+
continue
|
|
98
|
+
cat_scan = sum(bucket.get(k, 0.0) for k in scan_keys)
|
|
99
|
+
cat_types = sum(bucket.get(k, 0.0) for k in type_keys)
|
|
100
|
+
scan_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in scan_keys]
|
|
101
|
+
type_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in type_keys]
|
|
102
|
+
n_perf = bucket.get('_perfect_count', 0)
|
|
103
|
+
n_impf = bucket.get('_count', 0) - n_perf
|
|
104
|
+
total_perfect += n_perf
|
|
105
|
+
total_imperfect += n_impf
|
|
106
|
+
lines.append(' ' + cat.ljust(col_cat_w)
|
|
107
|
+
+ str(n_perf).rjust(count_w) + str(n_impf).rjust(count_w)
|
|
108
|
+
+ f'{cat_scan + cat_types:.2f}s'.rjust(sub_w)
|
|
109
|
+
+ f'{cat_scan:.2f}s'.rjust(sub_w)
|
|
110
|
+
+ f'{cat_types:.2f}s'.rjust(sub_w)
|
|
111
|
+
+ ' ' + fmt_row(scan_vals, type_vals))
|
|
112
|
+
lines.append(sep)
|
|
113
|
+
total_scan = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in scan_keys)
|
|
114
|
+
total_types = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in type_keys)
|
|
115
|
+
total_scan_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in scan_keys]
|
|
116
|
+
total_type_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in type_keys]
|
|
117
|
+
lines.append(' ' + 'TOTAL'.ljust(col_cat_w)
|
|
118
|
+
+ str(total_perfect).rjust(count_w) + str(total_imperfect).rjust(count_w)
|
|
119
|
+
+ f'{total_scan + total_types:.2f}s'.rjust(sub_w)
|
|
120
|
+
+ f'{total_scan:.2f}s'.rjust(sub_w)
|
|
121
|
+
+ f'{total_types:.2f}s'.rjust(sub_w)
|
|
122
|
+
+ ' ' + fmt_row(total_scan_vals, total_type_vals))
|
|
123
|
+
block = '\n'.join(lines) + '\n'
|
|
124
|
+
if write_file:
|
|
125
|
+
timing_path = os.path.join(os.path.dirname(__file__), 'profiling_debug.txt')
|
|
126
|
+
with open(timing_path, 'w', encoding='utf-8') as _f:
|
|
127
|
+
_f.write(block)
|
|
128
|
+
print(block, file=sys.stderr, flush=True)
|
|
129
|
+
_category_totals.clear()
|
|
130
|
+
_section_totals.clear()
|
|
131
|
+
|
|
17
132
|
|
|
18
133
|
@dataclass
|
|
19
134
|
class Diagnostic:
|
|
@@ -193,7 +308,7 @@ class VerseTester(object):
|
|
|
193
308
|
self.identification_attempt_count = 0
|
|
194
309
|
self._anuzwuB_half_cache = {} # cleared per wiggle_identify run
|
|
195
310
|
|
|
196
|
-
def combine_results(self, Vrs, new_label, new_score):
|
|
311
|
+
def combine_results(self, Vrs, new_label, new_score, new_is_perfect=False):
|
|
197
312
|
old_label = Vrs.meter_label or ''
|
|
198
313
|
old_score = Vrs.identification_score
|
|
199
314
|
|
|
@@ -207,6 +322,7 @@ class VerseTester(object):
|
|
|
207
322
|
# override previous
|
|
208
323
|
Vrs.meter_label = new_label
|
|
209
324
|
Vrs.identification_score = new_score
|
|
325
|
+
Vrs.is_perfect = new_is_perfect
|
|
210
326
|
|
|
211
327
|
elif new_score == old_score:
|
|
212
328
|
# tie, concatenate as old + new
|
|
@@ -326,12 +442,14 @@ class VerseTester(object):
|
|
|
326
442
|
if ardham_eva_result.perfect():
|
|
327
443
|
Vrs.meter_label = f"anuṣṭubh (ardham eva: {ardham_eva_result.perfect_id_label})"
|
|
328
444
|
Vrs.identification_score = meter_scores["anuṣṭubh, half, single half perfect)"]
|
|
445
|
+
Vrs.is_perfect = True
|
|
329
446
|
Vrs.diagnostic = ardham_eva_result
|
|
330
447
|
return ardham_eva_result
|
|
331
448
|
elif ardham_eva_result.imperfect():
|
|
332
449
|
label_str = '; '.join(f"{k}: {v}" for k, v in ardham_eva_result.imperfect_label_sanskrit.items())
|
|
333
450
|
Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
|
|
334
451
|
Vrs.identification_score = meter_scores["anuṣṭubh, half, single half imperfect)"]
|
|
452
|
+
Vrs.is_perfect = False
|
|
335
453
|
Vrs.diagnostic = ardham_eva_result
|
|
336
454
|
return ardham_eva_result
|
|
337
455
|
else:
|
|
@@ -346,6 +464,7 @@ class VerseTester(object):
|
|
|
346
464
|
if pAdas_ab_result.perfect() and pAdas_cd_result.perfect():
|
|
347
465
|
Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {pAdas_cd_result.perfect_id_label})"
|
|
348
466
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves perfect)"]
|
|
467
|
+
Vrs.is_perfect = True
|
|
349
468
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
350
469
|
return pAdas_ab_result
|
|
351
470
|
|
|
@@ -355,12 +474,14 @@ class VerseTester(object):
|
|
|
355
474
|
ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
|
|
356
475
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
|
|
357
476
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
|
|
477
|
+
Vrs.is_perfect = False
|
|
358
478
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
359
479
|
return pAdas_ab_result
|
|
360
480
|
elif pAdas_ab_result.perfect() and pAdas_cd_result.imperfect():
|
|
361
481
|
cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
|
|
362
482
|
Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {cd_str})"
|
|
363
483
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
|
|
484
|
+
Vrs.is_perfect = False
|
|
364
485
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
365
486
|
return pAdas_cd_result
|
|
366
487
|
|
|
@@ -371,6 +492,7 @@ class VerseTester(object):
|
|
|
371
492
|
cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
|
|
372
493
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {cd_str})"
|
|
373
494
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves imperfect)"]
|
|
495
|
+
Vrs.is_perfect = False
|
|
374
496
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
375
497
|
return pAdas_ab_result
|
|
376
498
|
|
|
@@ -380,12 +502,14 @@ class VerseTester(object):
|
|
|
380
502
|
ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
|
|
381
503
|
Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
|
|
382
504
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
|
|
505
|
+
Vrs.is_perfect = False
|
|
383
506
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
384
507
|
return pAdas_cd_result
|
|
385
508
|
elif pAdas_ab_result.perfect() and pAdas_cd_result.length_error():
|
|
386
509
|
cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
|
|
387
510
|
Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: ?? {cd_str})"
|
|
388
511
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
|
|
512
|
+
Vrs.is_perfect = False
|
|
389
513
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
390
514
|
return pAdas_ab_result
|
|
391
515
|
|
|
@@ -396,6 +520,7 @@ class VerseTester(object):
|
|
|
396
520
|
cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
|
|
397
521
|
Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {cd_str})"
|
|
398
522
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
|
|
523
|
+
Vrs.is_perfect = False
|
|
399
524
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
400
525
|
return pAdas_cd_result
|
|
401
526
|
elif pAdas_ab_result.imperfect() and pAdas_cd_result.length_error():
|
|
@@ -403,6 +528,7 @@ class VerseTester(object):
|
|
|
403
528
|
cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
|
|
404
529
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: ?? {cd_str})"
|
|
405
530
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
|
|
531
|
+
Vrs.is_perfect = False
|
|
406
532
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
407
533
|
return pAdas_ab_result
|
|
408
534
|
|
|
@@ -538,7 +664,7 @@ class VerseTester(object):
|
|
|
538
664
|
|
|
539
665
|
# score arbitration: may tie with pre-existing result (e.g., upajāti)
|
|
540
666
|
old_score = Vrs.identification_score
|
|
541
|
-
self.combine_results(Vrs, new_label=meter_label, new_score=score)
|
|
667
|
+
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=imperfect_note is None)
|
|
542
668
|
if score >= old_score:
|
|
543
669
|
Vrs.diagnostic = diagnostic
|
|
544
670
|
|
|
@@ -547,6 +673,7 @@ class VerseTester(object):
|
|
|
547
673
|
def evaluate_ardhasamavftta(self, Vrs):
|
|
548
674
|
# sufficient pAdasamatva already assured, now just evaluate
|
|
549
675
|
Vrs.identification_score = meter_scores["ardhasamavṛtta, perfect"]
|
|
676
|
+
Vrs.is_perfect = True
|
|
550
677
|
|
|
551
678
|
wbp = Vrs.syllable_weights.split('\n') # weights by pāda
|
|
552
679
|
|
|
@@ -575,6 +702,7 @@ class VerseTester(object):
|
|
|
575
702
|
meter_label = "ajñātārdhasamavṛtta" # i.e., might need to add to meter_patterns
|
|
576
703
|
meter_label += ' [%s, %s]' % (odd_g_to_id, even_g_to_id)
|
|
577
704
|
Vrs.identification_score = meter_scores["ardhasamavṛtta, perfect, unknown"]
|
|
705
|
+
Vrs.is_perfect = True # "perfect, unknown" means pattern unknown, not imperfect
|
|
578
706
|
|
|
579
707
|
Vrs.meter_label = meter_label
|
|
580
708
|
Vrs.diagnostic = Diagnostic(perfect_id_label=meter_label)
|
|
@@ -729,7 +857,11 @@ class VerseTester(object):
|
|
|
729
857
|
|
|
730
858
|
# score arbitration: may tie with pre-existing result (e.g., samavṛtta)
|
|
731
859
|
old_score = Vrs.identification_score
|
|
732
|
-
|
|
860
|
+
is_perf = (score in (meter_scores["upajāti, perfect"],
|
|
861
|
+
meter_scores["upajāti, triṣṭubh-jagatī-saṃkara, perfect"],
|
|
862
|
+
meter_scores["upajāti, non-triṣṭubh, perfect"])
|
|
863
|
+
and 'ajñātam' not in overall_meter_label)
|
|
864
|
+
self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=is_perf)
|
|
733
865
|
if score >= old_score:
|
|
734
866
|
Vrs.diagnostic = diagnostic
|
|
735
867
|
|
|
@@ -742,6 +874,7 @@ class VerseTester(object):
|
|
|
742
874
|
for (a, b, c, d) in meter_patterns.vizamavftta_by_4_tuple:
|
|
743
875
|
if (gs_to_id[0],gs_to_id[1],gs_to_id[2],gs_to_id[3]) == (a, b, c, d):
|
|
744
876
|
Vrs.identification_score = meter_scores["viṣamavṛtta, perfect"]
|
|
877
|
+
Vrs.is_perfect = True
|
|
745
878
|
Vrs.meter_label = meter_patterns.vizamavftta_by_4_tuple[(a, b, c, d)]
|
|
746
879
|
Vrs.diagnostic = Diagnostic(perfect_id_label=Vrs.meter_label)
|
|
747
880
|
return True
|
|
@@ -770,7 +903,7 @@ class VerseTester(object):
|
|
|
770
903
|
# test perfect samavṛtta
|
|
771
904
|
if self.pAdasamatva_count == 4:
|
|
772
905
|
# definitely checks out, id_score == 9
|
|
773
|
-
self.evaluate_samavftta(Vrs)
|
|
906
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
774
907
|
return 1 # max score already reached
|
|
775
908
|
|
|
776
909
|
# test perfect ardhasamavftta
|
|
@@ -786,10 +919,10 @@ class VerseTester(object):
|
|
|
786
919
|
|
|
787
920
|
# test perfect single pāda of samavṛtta
|
|
788
921
|
if ( self.pAdasamatva_count == 0 and self.resplit_option == "single_pAda"):
|
|
789
|
-
self.evaluate_samavftta(Vrs)
|
|
922
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
790
923
|
|
|
791
924
|
# test perfect viṣamavṛtta
|
|
792
|
-
if self.pAdasamatva_count == 0 and self.is_vizamavftta(Vrs):
|
|
925
|
+
if self.pAdasamatva_count == 0 and timed('vizamavftta')(self.is_vizamavftta)(Vrs):
|
|
793
926
|
# will give id_score == 9
|
|
794
927
|
# label and score already set in is_vizamavftta if test was successful
|
|
795
928
|
return 1 # max score already reached
|
|
@@ -800,14 +933,14 @@ class VerseTester(object):
|
|
|
800
933
|
unique_sorted_lens.sort()
|
|
801
934
|
if len(unique_sorted_lens) == 1: # all same length
|
|
802
935
|
# will give id_score in [8, 7], may tie with above
|
|
803
|
-
self.evaluate_upajAti(Vrs)
|
|
936
|
+
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
804
937
|
if Vrs.identification_score == 8: return 1 # best score compared to below
|
|
805
938
|
# otherwise, max score not necessarily yet reached, don't return
|
|
806
939
|
|
|
807
940
|
# test imperfect samavftta
|
|
808
941
|
if self.pAdasamatva_count in [2, 3]:
|
|
809
942
|
# will give id_score in [7, 6], may tie with above
|
|
810
|
-
self.evaluate_samavftta(Vrs)
|
|
943
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
811
944
|
# max score not necessarily yet reached, don't return
|
|
812
945
|
|
|
813
946
|
# test imperfect ardhasamavftta? seems hard
|
|
@@ -819,7 +952,7 @@ class VerseTester(object):
|
|
|
819
952
|
unique_sorted_lens == [11, 12]
|
|
820
953
|
): # either not all same length or triṣṭubh-jagatī mix
|
|
821
954
|
# will give id_score in [6, 5, 4], may tie with above
|
|
822
|
-
self.evaluate_upajAti(Vrs)
|
|
955
|
+
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
823
956
|
|
|
824
957
|
# return success
|
|
825
958
|
if Vrs.meter_label != None:
|
|
@@ -889,6 +1022,7 @@ class VerseTester(object):
|
|
|
889
1022
|
suffix = '; '.join(f"ardha {i+1}: {v}" for i, v in enumerate(sa_vals))
|
|
890
1023
|
Vrs.meter_label = jati_label + f" ({suffix})"
|
|
891
1024
|
Vrs.identification_score = likely_score
|
|
1025
|
+
Vrs.is_perfect = False
|
|
892
1026
|
Vrs.diagnostic = Diagnostic(
|
|
893
1027
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
894
1028
|
imperfect_label_english=per_pada_english or None,
|
|
@@ -1064,6 +1198,7 @@ class VerseTester(object):
|
|
|
1064
1198
|
if jati_score >= Vrs.identification_score:
|
|
1065
1199
|
Vrs.meter_label = jati_label + f" ({imperfect_label_sa})"
|
|
1066
1200
|
Vrs.identification_score = jati_score
|
|
1201
|
+
Vrs.is_perfect = False
|
|
1067
1202
|
Vrs.mAtragaNa_abbreviations = mAtragaNa_abbrevs
|
|
1068
1203
|
Vrs.diagnostic = Diagnostic(
|
|
1069
1204
|
imperfect_label_sanskrit=label_sa_by_pada or None,
|
|
@@ -1116,6 +1251,7 @@ class VerseTester(object):
|
|
|
1116
1251
|
if score >= Vrs.identification_score:
|
|
1117
1252
|
Vrs.meter_label = new_label
|
|
1118
1253
|
Vrs.identification_score = score
|
|
1254
|
+
Vrs.is_perfect = score == meter_scores["jāti, perfect"]
|
|
1119
1255
|
Vrs.mAtragaNa_abbreviations = mAtragaNa_abbrevs
|
|
1120
1256
|
Vrs.diagnostic = diagnostic
|
|
1121
1257
|
return 1
|
|
@@ -1156,25 +1292,23 @@ class VerseTester(object):
|
|
|
1156
1292
|
self.identification_attempt_count += 1
|
|
1157
1293
|
|
|
1158
1294
|
# anuzwuB
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
if anuzwuB_diagnostic and Vrs.identification_score == meter_scores["max score"]:
|
|
1295
|
+
success_anuzwuB = timed('anuzwuB')(self.test_as_anuzwuB)(Vrs)
|
|
1296
|
+
if success_anuzwuB and Vrs.identification_score == meter_scores["max score"]:
|
|
1162
1297
|
return 1
|
|
1163
1298
|
|
|
1164
1299
|
# samavftta, upajAti, vizamavftta, ardhasamavftta
|
|
1165
|
-
|
|
1166
1300
|
success_samavftta_etc = self.test_as_samavftta_etc(Vrs)
|
|
1167
|
-
if success_samavftta_etc and Vrs.identification_score >= 8:
|
|
1301
|
+
if success_samavftta_etc and Vrs.identification_score >= 8:
|
|
1302
|
+
return 1
|
|
1168
1303
|
# i.e., if upajāti or anything imperfect, also continue on to check jāti
|
|
1169
1304
|
|
|
1170
1305
|
# problem: how to change above handling for rare case
|
|
1171
1306
|
# where ardhasamavftta is also jAti?
|
|
1172
1307
|
|
|
1173
1308
|
# jāti
|
|
1309
|
+
success_jAti = timed('jAti')(self.test_as_jAti)(Vrs)
|
|
1174
1310
|
|
|
1175
|
-
|
|
1176
|
-
|
|
1177
|
-
if anuzwuB_diagnostic or success_samavftta_etc or success_jAti:
|
|
1311
|
+
if success_anuzwuB or success_samavftta_etc or success_jAti:
|
|
1178
1312
|
return 1
|
|
1179
1313
|
else:
|
|
1180
1314
|
return 0
|
|
@@ -1260,13 +1394,17 @@ class MeterIdentifier(object):
|
|
|
1260
1394
|
|
|
1261
1395
|
temp_V = copy(Vrs)
|
|
1262
1396
|
temp_V.text_syllabified = new_text_syllabified
|
|
1263
|
-
|
|
1397
|
+
|
|
1398
|
+
if _DEBUG_TIMING:
|
|
1399
|
+
_section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + 1
|
|
1400
|
+
|
|
1401
|
+
temp_V.syllable_weights = timed('scan_weights')(S.scan_syllable_weights)(
|
|
1264
1402
|
temp_V.text_syllabified)
|
|
1265
|
-
temp_V.morae_per_line = S.count_morae(
|
|
1403
|
+
temp_V.morae_per_line = timed('scan_morae_gana')(S.count_morae)(
|
|
1266
1404
|
temp_V.syllable_weights)
|
|
1267
|
-
temp_V.gaRa_abbreviations = '
|
|
1268
|
-
|
|
1269
|
-
)
|
|
1405
|
+
temp_V.gaRa_abbreviations = timed('scan_morae_gana')(
|
|
1406
|
+
lambda: '\n'.join([ S.gaRa_abbreviate(line) for line in temp_V.syllable_weights.split('\n') ])
|
|
1407
|
+
)()
|
|
1270
1408
|
|
|
1271
1409
|
success = VrsTster.attempt_identification(temp_V)
|
|
1272
1410
|
|
|
@@ -1343,6 +1481,11 @@ class MeterIdentifier(object):
|
|
|
1343
1481
|
# gets back mostly populated Verse object
|
|
1344
1482
|
V = S.scan(rw_str, from_scheme=from_scheme)
|
|
1345
1483
|
|
|
1484
|
+
if _DEBUG_TIMING:
|
|
1485
|
+
_pre_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
1486
|
+
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta', 'jAti')
|
|
1487
|
+
_pre = {k: _section_totals.get(k, 0.0) for k in _pre_keys}
|
|
1488
|
+
|
|
1346
1489
|
self.VerseTester = VT = VerseTester()
|
|
1347
1490
|
self.VerseTester.resplit_option = resplit_option
|
|
1348
1491
|
self.VerseTester.resplit_keep_midpoint = resplit_keep_midpoint
|
|
@@ -1423,4 +1566,17 @@ class MeterIdentifier(object):
|
|
|
1423
1566
|
V.meter_label = 'na kiṃcid adhyavasitam'
|
|
1424
1567
|
V.identification_score = meter_scores["none found"]
|
|
1425
1568
|
|
|
1569
|
+
if _DEBUG_TIMING:
|
|
1570
|
+
all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
1571
|
+
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta', 'jAti')
|
|
1572
|
+
verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in all_keys}
|
|
1573
|
+
verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
|
|
1574
|
+
cat = _meter_label_to_category(V.meter_label)
|
|
1575
|
+
bucket = _category_totals.setdefault(cat, {})
|
|
1576
|
+
for k, v in verse_times.items():
|
|
1577
|
+
bucket[k] = bucket.get(k, 0.0) + v
|
|
1578
|
+
bucket['_count'] = bucket.get('_count', 0) + 1
|
|
1579
|
+
if _verse_is_perfect(V):
|
|
1580
|
+
bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
|
|
1581
|
+
|
|
1426
1582
|
return V
|
|
@@ -3,6 +3,7 @@ from skrutable import scheme_detection
|
|
|
3
3
|
from skrutable import meter_patterns
|
|
4
4
|
from skrutable import phonemes
|
|
5
5
|
from skrutable.config import load_config_dict_from_json_file
|
|
6
|
+
from skrutable.utils import timed
|
|
6
7
|
import re
|
|
7
8
|
|
|
8
9
|
# load config variables
|
|
@@ -146,6 +147,7 @@ class Scanner(object):
|
|
|
146
147
|
self.Transliterator = None # will hold Transliterator object
|
|
147
148
|
|
|
148
149
|
|
|
150
|
+
@timed('scan_clean')
|
|
149
151
|
def clean_input(self, cntnts, scheme_in):
|
|
150
152
|
"""
|
|
151
153
|
Accepts raw text string,
|
|
@@ -174,6 +176,7 @@ class Scanner(object):
|
|
|
174
176
|
|
|
175
177
|
return cntnts
|
|
176
178
|
|
|
179
|
+
@timed('scan_syllabify')
|
|
177
180
|
def syllabify_text(self, txt_SLP):
|
|
178
181
|
"""
|
|
179
182
|
Accepts (newline-separated) multi-line string of SLP text.
|
|
@@ -238,6 +241,7 @@ class Scanner(object):
|
|
|
238
241
|
return text_syllabified
|
|
239
242
|
|
|
240
243
|
|
|
244
|
+
@timed('scan_weights')
|
|
241
245
|
def scan_syllable_weights(self, txt_syl):
|
|
242
246
|
"""
|
|
243
247
|
Accepts (newline-separated) multi-line string of text
|
|
@@ -295,6 +299,7 @@ class Scanner(object):
|
|
|
295
299
|
return syllable_weights
|
|
296
300
|
|
|
297
301
|
|
|
302
|
+
@timed('scan_morae_gana')
|
|
298
303
|
def count_morae(self, syl_wts):
|
|
299
304
|
"""
|
|
300
305
|
Accepts (newline-separated) multi-line string of text
|
|
@@ -370,13 +375,13 @@ class Scanner(object):
|
|
|
370
375
|
T.scheme_out = 'SLP'
|
|
371
376
|
|
|
372
377
|
V.text_cleaned = self.clean_input(V.text_raw, V.original_scheme)
|
|
373
|
-
V.text_SLP = T.transliterate(V.text_cleaned)
|
|
378
|
+
V.text_SLP = timed('scan_translit')(T.transliterate)(V.text_cleaned)
|
|
374
379
|
V.text_syllabified = self.syllabify_text(V.text_SLP)
|
|
375
380
|
V.syllable_weights = self.scan_syllable_weights(V.text_syllabified)
|
|
376
381
|
V.morae_per_line = self.count_morae(V.syllable_weights)
|
|
377
|
-
V.gaRa_abbreviations = '
|
|
378
|
-
|
|
379
|
-
)
|
|
382
|
+
V.gaRa_abbreviations = timed('scan_morae_gana')(
|
|
383
|
+
lambda: '\n'.join([ self.gaRa_abbreviate(line) for line in V.syllable_weights.split('\n') ])
|
|
384
|
+
)()
|
|
380
385
|
|
|
381
386
|
self.Verse = V
|
|
382
387
|
self.Transliterator = T
|
|
@@ -0,0 +1,20 @@
|
|
|
1
|
+
import time
|
|
2
|
+
from functools import wraps
|
|
3
|
+
|
|
4
|
+
_DEBUG_TIMING = False
|
|
5
|
+
|
|
6
|
+
_section_totals = {} # flat dict of all timing buckets: scan sub-keys, id type keys, wiggle, etc.
|
|
7
|
+
|
|
8
|
+
def timed(key):
|
|
9
|
+
"""Decorator that accumulates wall time for the wrapped call into _section_totals[key]."""
|
|
10
|
+
def decorator(fn):
|
|
11
|
+
@wraps(fn)
|
|
12
|
+
def wrapper(*args, **kwargs):
|
|
13
|
+
if _DEBUG_TIMING:
|
|
14
|
+
t0 = time.perf_counter()
|
|
15
|
+
result = fn(*args, **kwargs)
|
|
16
|
+
_section_totals[key] = _section_totals.get(key, 0.0) + time.perf_counter() - t0
|
|
17
|
+
return result
|
|
18
|
+
return fn(*args, **kwargs)
|
|
19
|
+
return wrapper
|
|
20
|
+
return decorator
|
|
@@ -19,6 +19,7 @@ src/skrutable/scheme_vectors.json
|
|
|
19
19
|
src/skrutable/scheme_vectors_mbh.py
|
|
20
20
|
src/skrutable/splitting.py
|
|
21
21
|
src/skrutable/transliteration.py
|
|
22
|
+
src/skrutable/utils.py
|
|
22
23
|
src/skrutable/virAma_avoidance.py
|
|
23
24
|
src/skrutable.egg-info/PKG-INFO
|
|
24
25
|
src/skrutable.egg-info/SOURCES.txt
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.4.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|