skrutable 2.7.0__tar.gz → 2.8.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skrutable-2.7.0 → skrutable-2.8.0}/PKG-INFO +1 -1
- skrutable-2.8.0/src/skrutable/__init__.py +1 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/config.json +8 -6
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/meter_identification.py +436 -273
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/meter_patterns.py +2 -2
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/scansion.py +1 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable.egg-info/PKG-INFO +1 -1
- skrutable-2.7.0/src/skrutable/__init__.py +0 -1
- {skrutable-2.7.0 → skrutable-2.8.0}/LICENSE.md +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/README.md +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/setup.cfg +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/setup.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/config.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/generate_scheme_vectors.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/impossible_bigrams.json +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/manual.md +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/phonemes.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/run_examples.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/scheme_vectors.json +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/splitting.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/transliteration.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/utils.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable.egg-info/SOURCES.txt +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.7.0 → skrutable-2.8.0}/src/skrutable.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.8.0"
|
|
@@ -7,7 +7,7 @@
|
|
|
7
7
|
"additional_pAda_separators" : ["\t", ";", ",", "/", "|", "।"],
|
|
8
8
|
"default_resplit_option" : "resplit_lite",
|
|
9
9
|
"default_resplit_keep_midpoint" : false,
|
|
10
|
-
"
|
|
10
|
+
"allow_only_trizwuB_and_jagatI_upajAti" : true,
|
|
11
11
|
"meter_scores" : {
|
|
12
12
|
"max score" : 9,
|
|
13
13
|
"anuṣṭubh, full, both halves perfect)" : 9,
|
|
@@ -30,13 +30,15 @@
|
|
|
30
30
|
"viṣamavṛtta, perfect" : 9,
|
|
31
31
|
"viṣamavṛtta, imperfect" : 7,
|
|
32
32
|
"upajāti, perfect" : 8,
|
|
33
|
-
"upajāti,
|
|
34
|
-
"upajāti,
|
|
35
|
-
"upajāti,
|
|
36
|
-
"upajāti, non-triṣṭubh, imperfect" : 3,
|
|
33
|
+
"upajāti, penalty, jagati" : 1,
|
|
34
|
+
"upajāti, penalty, per missing pāda" : 2,
|
|
35
|
+
"upajāti, penalty, per ajñātam pāda" : 2,
|
|
37
36
|
"jāti, perfect" : 9,
|
|
38
37
|
"jāti, imperfect" : 6,
|
|
39
|
-
"jāti, likely" :
|
|
38
|
+
"jāti, likely" : 4,
|
|
39
|
+
"jāti, penalty, per mora-mismatched pāda" : 1,
|
|
40
|
+
"samavṛtta, penalty, ajñātasamavṛtta" : 2,
|
|
41
|
+
"levenshtein distance penalty" : 1,
|
|
40
42
|
"none found" : 1
|
|
41
43
|
},
|
|
42
44
|
"preserve_punctuation_default" : true,
|
|
@@ -4,6 +4,7 @@ from skrutable.config import load_config_dict_from_json_file
|
|
|
4
4
|
from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
|
|
5
5
|
import re
|
|
6
6
|
import time as _time
|
|
7
|
+
from functools import lru_cache
|
|
7
8
|
from copy import copy
|
|
8
9
|
from concurrent.futures import ProcessPoolExecutor
|
|
9
10
|
from dataclasses import dataclass
|
|
@@ -17,11 +18,25 @@ config = load_config_dict_from_json_file()
|
|
|
17
18
|
scansion_syllable_separator = config["scansion_syllable_separator"] # e.g. " "
|
|
18
19
|
default_resplit_option = config["default_resplit_option"] # e.g. "none"
|
|
19
20
|
default_resplit_keep_midpoint = config["default_resplit_keep_midpoint"] # e.g. True
|
|
20
|
-
|
|
21
|
+
allow_only_trizwuB_and_jagatI_upajAti = config["allow_only_trizwuB_and_jagatI_upajAti"] # e.g. True
|
|
21
22
|
meter_scores = config["meter_scores"] # dict
|
|
22
23
|
|
|
23
24
|
_category_totals = {} # { category: { section: float seconds } }, single source of truth
|
|
24
25
|
|
|
26
|
+
# Profiling categories and labels
|
|
27
|
+
_SCAN_ABBREV = {
|
|
28
|
+
'scan_clean': 'clean', 'scan_translit': 'transl', 'scan_syllabify': 'syl',
|
|
29
|
+
'scan_weights': 'wts', 'scan_morae_gana': 'mor+g',
|
|
30
|
+
}
|
|
31
|
+
_ID_CASCADE_ABBREV = {
|
|
32
|
+
'anuzwuB': 'anuṣṭ', 'ardhatraya': 'anuṣṭ3', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
|
|
33
|
+
'ardhasamavftta_perfect': 'ardha✓', 'vizamavftta': 'vizama',
|
|
34
|
+
'jAti': 'jāti',
|
|
35
|
+
'lev_samavftta': 'lev✗sama', 'lev_upajAti': 'lev✗upaj', 'lev_ardha': 'lev✗ardh', 'lev_vizama': 'lev✗visa',
|
|
36
|
+
}
|
|
37
|
+
_SCAN_KEYS = tuple(_SCAN_ABBREV)
|
|
38
|
+
_ID_CASCADE_KEYS = tuple(_ID_CASCADE_ABBREV)
|
|
39
|
+
_TIMING_KEYS = _SCAN_KEYS + _ID_CASCADE_KEYS
|
|
25
40
|
|
|
26
41
|
_ARDHASAMAVRTTA_NAMES = [
|
|
27
42
|
'aparavaktra', 'upacitra', 'puṣpitāgrā', 'viyoginī', 'vegavatī',
|
|
@@ -65,18 +80,9 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
|
|
|
65
80
|
if not _DEBUG_TIMING or not _category_totals:
|
|
66
81
|
return
|
|
67
82
|
import sys, os
|
|
68
|
-
scan_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana')
|
|
69
|
-
type_keys = ('anuzwuB', 'ardhatraya', 'samavftta_etc', 'samavftta', 'upajAti', 'ardhasamavftta_perfect', 'vizamavftta', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama')
|
|
70
|
-
type_abbrev = {
|
|
71
|
-
'anuzwuB': 'anuṣṭ', 'ardhatraya': 'anuṣṭ3', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
|
|
72
|
-
'ardhasamavftta_perfect': 'ardha✓', 'vizamavftta': 'vizama',
|
|
73
|
-
'jAti': 'jāti',
|
|
74
|
-
'lev_samavftta': 'lev✗sama', 'lev_ardha': 'lev✗ardh', 'lev_vizama': 'lev✗visa',
|
|
75
|
-
}
|
|
76
|
-
scan_abbrev = {'scan_clean': 'clean', 'scan_translit': 'transl', 'scan_syllabify': 'syl', 'scan_weights': 'wts', 'scan_morae_gana': 'mor+g'}
|
|
77
83
|
cat_order = ['anuṣṭubh', 'samavṛtta', 'upajāti', 'ardhasamavṛtta', 'viṣamavṛtta', 'jāti', 'na kiṃcid adhyavasitam']
|
|
78
|
-
hdr_scan_abbrevs =
|
|
79
|
-
hdr_type_abbrevs =
|
|
84
|
+
hdr_scan_abbrevs = list(_SCAN_ABBREV.values())
|
|
85
|
+
hdr_type_abbrevs = list(_ID_CASCADE_ABBREV.values())
|
|
80
86
|
val_w = len('0.00s')
|
|
81
87
|
col_cat_w = max(len(c) for c in cat_order + ['category']) + 2
|
|
82
88
|
sub_w = max(len('scan∑'), len('types∑'), len('total'), val_w) + 2
|
|
@@ -106,10 +112,10 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
|
|
|
106
112
|
bucket = _category_totals.get(cat)
|
|
107
113
|
if not bucket:
|
|
108
114
|
continue
|
|
109
|
-
cat_scan = sum(bucket.get(k, 0.0) for k in
|
|
110
|
-
cat_types = sum(bucket.get(k, 0.0) for k in
|
|
111
|
-
scan_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in
|
|
112
|
-
type_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in
|
|
115
|
+
cat_scan = sum(bucket.get(k, 0.0) for k in _SCAN_KEYS)
|
|
116
|
+
cat_types = sum(bucket.get(k, 0.0) for k in _ID_CASCADE_KEYS)
|
|
117
|
+
scan_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in _SCAN_KEYS]
|
|
118
|
+
type_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in _ID_CASCADE_KEYS]
|
|
113
119
|
n_perf = bucket.get('_perfect_count', 0)
|
|
114
120
|
n_impf = bucket.get('_count', 0) - n_perf
|
|
115
121
|
total_perfect += n_perf
|
|
@@ -121,10 +127,10 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
|
|
|
121
127
|
+ f'{cat_types:.2f}s'.rjust(sub_w)
|
|
122
128
|
+ ' ' + fmt_row(scan_vals, type_vals))
|
|
123
129
|
lines.append(sep)
|
|
124
|
-
total_scan = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in
|
|
125
|
-
total_types = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in
|
|
126
|
-
total_scan_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in
|
|
127
|
-
total_type_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in
|
|
130
|
+
total_scan = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in _SCAN_KEYS)
|
|
131
|
+
total_types = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in _ID_CASCADE_KEYS)
|
|
132
|
+
total_scan_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in _SCAN_KEYS]
|
|
133
|
+
total_type_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in _ID_CASCADE_KEYS]
|
|
128
134
|
lines.append(' ' + 'TOTAL'.ljust(col_cat_w)
|
|
129
135
|
+ str(total_perfect).rjust(count_w) + str(total_imperfect).rjust(count_w)
|
|
130
136
|
+ f'{total_scan + total_types:.2f}s'.rjust(sub_w)
|
|
@@ -156,6 +162,7 @@ class Diagnostic:
|
|
|
156
162
|
problem_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); None if perfect
|
|
157
163
|
notable_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); green-highlighted "interesting/ok" syllables
|
|
158
164
|
notable_label: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); label for the notable feature (same string for skt/eng)
|
|
165
|
+
canonical_gana: Optional[dict] = None # keyed by pada (1–4); canonical gaṇa char string for Levenshtein-attributed length-deviant pādas
|
|
159
166
|
|
|
160
167
|
def perfect(self):
|
|
161
168
|
return self.perfect_id_label is not None
|
|
@@ -182,6 +189,19 @@ _vizamavftta_precomputed = [
|
|
|
182
189
|
for gaRas, label in meter_patterns.vizamavftta_by_4_tuple.items()
|
|
183
190
|
]
|
|
184
191
|
|
|
192
|
+
# Precomputed upajāti candidate patterns by length, for future deferred Levenshtein use:
|
|
193
|
+
# (canonical_gaRa_str, canonical_weights_str, meter_name, gaRa_regex_str)
|
|
194
|
+
_upajAti_patterns_by_length = {}
|
|
195
|
+
for _L, _patterns in meter_patterns.samavfttas_by_family_and_gaRa.items():
|
|
196
|
+
if not _patterns:
|
|
197
|
+
continue
|
|
198
|
+
_entries = []
|
|
199
|
+
for _gaRa_pattern, _meter_name in _patterns.items():
|
|
200
|
+
_canonical_gaRa = meter_patterns.choose_heavy_gaRa_pattern(_gaRa_pattern)
|
|
201
|
+
_canonical_weights = _gaRa_str_to_weights(_canonical_gaRa)
|
|
202
|
+
_entries.append((_canonical_gaRa, _canonical_weights, _meter_name, _gaRa_pattern))
|
|
203
|
+
_upajAti_patterns_by_length[_L] = _entries
|
|
204
|
+
|
|
185
205
|
|
|
186
206
|
def _levenshtein_align(observed, canonical):
|
|
187
207
|
"""Return (distance, problem_indices) comparing observed lg string to canonical,
|
|
@@ -232,6 +252,12 @@ def _levenshtein_align(observed, canonical):
|
|
|
232
252
|
return dist, []
|
|
233
253
|
|
|
234
254
|
|
|
255
|
+
@lru_cache(maxsize=None)
|
|
256
|
+
def _levenshtein_align_cached(observed, canonical):
|
|
257
|
+
dist, prob = _levenshtein_align(observed, canonical)
|
|
258
|
+
return dist, tuple(prob)
|
|
259
|
+
|
|
260
|
+
|
|
235
261
|
def _decompose_into_mAtragaNas(weights_str, gana_6_morae, gana_8_morae):
|
|
236
262
|
"""
|
|
237
263
|
Decomposes an ardha (half-verse) weight string into mātrā-gaṇas.
|
|
@@ -378,8 +404,8 @@ class VerseTester(object):
|
|
|
378
404
|
Most methods take a populated scansion.Verse object as an argument;
|
|
379
405
|
test_as_anuzwuB_half() is an exception.
|
|
380
406
|
|
|
381
|
-
Primary method attempt_identification
|
|
382
|
-
|
|
407
|
+
Primary method attempt_identification embeds results in the Verse object
|
|
408
|
+
and returns 1 if identified, 0 if not.
|
|
383
409
|
"""
|
|
384
410
|
|
|
385
411
|
def __init__(self):
|
|
@@ -392,8 +418,9 @@ class VerseTester(object):
|
|
|
392
418
|
self._ardha_stash = [] # accumulated across wiggle candidates
|
|
393
419
|
self._vizama_stash = [] # accumulated across wiggle candidates
|
|
394
420
|
self._samavftta_has_length_error = False # set during evaluate_samavftta perfect_only pass
|
|
421
|
+
self._upajAti_needs_lev = False # set during evaluate_upajAti forward pass
|
|
395
422
|
|
|
396
|
-
def combine_results(self, Vrs, new_label, new_score, new_is_perfect=False):
|
|
423
|
+
def combine_results(self, Vrs, new_label, new_score, new_is_perfect=False, new_diagnostic=None):
|
|
397
424
|
old_label = Vrs.meter_label or ''
|
|
398
425
|
old_score = Vrs.identification_score
|
|
399
426
|
|
|
@@ -408,9 +435,18 @@ class VerseTester(object):
|
|
|
408
435
|
Vrs.meter_label = new_label
|
|
409
436
|
Vrs.identification_score = new_score
|
|
410
437
|
Vrs.is_perfect = new_is_perfect
|
|
438
|
+
Vrs.alternatives = []
|
|
411
439
|
|
|
412
440
|
elif new_score == old_score:
|
|
413
441
|
# tie, concatenate as old + new
|
|
442
|
+
if Vrs.meter_label is None:
|
|
443
|
+
Vrs.meter_label = new_label
|
|
444
|
+
Vrs.is_perfect = new_is_perfect
|
|
445
|
+
else:
|
|
446
|
+
# stash the first alternative before appending the second
|
|
447
|
+
if not Vrs.alternatives:
|
|
448
|
+
Vrs.alternatives = [{'meter_label': old_label, 'diagnostic': Vrs.diagnostic}]
|
|
449
|
+
Vrs.alternatives.append({'meter_label': new_label, 'diagnostic': new_diagnostic})
|
|
414
450
|
Vrs.meter_label += " atha vā " + new_label
|
|
415
451
|
# do not change score
|
|
416
452
|
|
|
@@ -632,9 +668,9 @@ class VerseTester(object):
|
|
|
632
668
|
|
|
633
669
|
def count_pAdasamatva(self, Vrs):
|
|
634
670
|
"""
|
|
635
|
-
Accepts four-part (newline-separated)
|
|
671
|
+
Accepts Verse object with four-part (newline-separated) syllable_weights.
|
|
636
672
|
Since testing for samavṛtta, ignores final anceps syllable in each part.
|
|
637
|
-
|
|
673
|
+
Sets self.pAdasamatva_count to 0, 2, 3, or 4 (size of best matching group).
|
|
638
674
|
"""
|
|
639
675
|
|
|
640
676
|
self.pAdasamatva_count = 0
|
|
@@ -665,7 +701,7 @@ class VerseTester(object):
|
|
|
665
701
|
|
|
666
702
|
# get index of most frequent pāda type
|
|
667
703
|
wbp_sans_final = [ w[:-1] for w in wbp ] # omit final anceps from consideration
|
|
668
|
-
most_freq_pAda = max( set(wbp_sans_final), key=wbp_sans_final.count )
|
|
704
|
+
most_freq_pAda = max( sorted(set(wbp_sans_final)), key=wbp_sans_final.count )
|
|
669
705
|
i = wbp_sans_final.index(most_freq_pAda)
|
|
670
706
|
|
|
671
707
|
w_to_id = wbp[i] # weights to id, including final anceps
|
|
@@ -702,9 +738,10 @@ class VerseTester(object):
|
|
|
702
738
|
meter_label += " (%s)" % imperfect_note
|
|
703
739
|
score = meter_scores["samavṛtta, quarter, perfect"]
|
|
704
740
|
|
|
705
|
-
# experimental penalty, can later incorporate into config meter_scores
|
|
706
741
|
if "ajñātasamavṛtta" in meter_label:
|
|
707
|
-
score -=
|
|
742
|
+
score -= meter_scores["samavṛtta, penalty, ajñātasamavṛtta"]
|
|
743
|
+
|
|
744
|
+
bare_meter_label = meter_label # forward-pass label before per-pāda length notes
|
|
708
745
|
|
|
709
746
|
# Build per-pāda diagnostic: length errors (Levenshtein), then pattern errors.
|
|
710
747
|
# In perfect_only mode, skip Levenshtein — just register the result and return.
|
|
@@ -718,9 +755,10 @@ class VerseTester(object):
|
|
|
718
755
|
# Defer length-error annotation to the imperfect pass; register result now.
|
|
719
756
|
self._samavftta_has_length_error = True
|
|
720
757
|
old_score = Vrs.identification_score
|
|
721
|
-
|
|
758
|
+
_diag = Diagnostic(perfect_id_label=meter_label)
|
|
759
|
+
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_diagnostic=_diag)
|
|
722
760
|
if score >= old_score:
|
|
723
|
-
Vrs.diagnostic =
|
|
761
|
+
Vrs.diagnostic = _diag
|
|
724
762
|
return
|
|
725
763
|
|
|
726
764
|
for pada_num, w in enumerate(wbp[:4], start=1):
|
|
@@ -770,9 +808,14 @@ class VerseTester(object):
|
|
|
770
808
|
|
|
771
809
|
# score arbitration: may tie with pre-existing result (e.g., upajāti)
|
|
772
810
|
old_score = Vrs.identification_score
|
|
773
|
-
self.
|
|
774
|
-
|
|
811
|
+
if self._samavftta_has_length_error and Vrs.meter_label == bare_meter_label:
|
|
812
|
+
# Replace the forward-pass placeholder with the fully-annotated label.
|
|
813
|
+
Vrs.meter_label = meter_label
|
|
775
814
|
Vrs.diagnostic = diagnostic
|
|
815
|
+
else:
|
|
816
|
+
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=not imperfect_note and not has_any_error, new_diagnostic=diagnostic)
|
|
817
|
+
if score >= old_score:
|
|
818
|
+
Vrs.diagnostic = diagnostic
|
|
776
819
|
|
|
777
820
|
|
|
778
821
|
def evaluate_ardhasamavftta(self, Vrs, perfect_only=False):
|
|
@@ -810,9 +853,10 @@ class VerseTester(object):
|
|
|
810
853
|
):
|
|
811
854
|
score = meter_scores["ardhasamavṛtta, perfect"]
|
|
812
855
|
old_score = Vrs.identification_score
|
|
813
|
-
|
|
856
|
+
_diag = Diagnostic(perfect_id_label=meter_label)
|
|
857
|
+
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=True, new_diagnostic=_diag)
|
|
814
858
|
if score >= old_score:
|
|
815
|
-
Vrs.diagnostic =
|
|
859
|
+
Vrs.diagnostic = _diag
|
|
816
860
|
self._ardha_stash = [] # perfect found; no need for imperfect pass
|
|
817
861
|
return
|
|
818
862
|
# same length but not perfect — stash without distance computation
|
|
@@ -875,137 +919,229 @@ class VerseTester(object):
|
|
|
875
919
|
imperfect_label = best_label + f" ({suffix})"
|
|
876
920
|
|
|
877
921
|
old_score = Vrs.identification_score
|
|
878
|
-
|
|
922
|
+
_diag = Diagnostic(
|
|
923
|
+
perfect_id_label=imperfect_label,
|
|
924
|
+
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
925
|
+
imperfect_label_english=per_pada_english or None,
|
|
926
|
+
problem_syllables=problem_syllables or None,
|
|
927
|
+
)
|
|
928
|
+
self.combine_results(Vrs, new_label=imperfect_label, new_score=score, new_diagnostic=_diag)
|
|
879
929
|
if score >= old_score:
|
|
880
|
-
Vrs.diagnostic =
|
|
881
|
-
|
|
882
|
-
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
883
|
-
imperfect_label_english=per_pada_english or None,
|
|
884
|
-
problem_syllables=problem_syllables or None,
|
|
885
|
-
)
|
|
930
|
+
Vrs.diagnostic = _diag
|
|
931
|
+
|
|
886
932
|
|
|
933
|
+
def _upajAti_match_pada_exact(self, pada_len, gaRa_str):
|
|
934
|
+
"""Exact regex attribution for one upajāti pāda against its own length's patterns.
|
|
887
935
|
|
|
888
|
-
|
|
936
|
+
Returns (meter_label, is_ajnata) where meter_label is the formatted label string
|
|
937
|
+
and is_ajnata is True if no pattern matched.
|
|
938
|
+
"""
|
|
939
|
+
for gaRa_pattern in meter_patterns.samavfttas_by_family_and_gaRa[pada_len].keys():
|
|
940
|
+
if re.match(re.compile(gaRa_pattern), gaRa_str):
|
|
941
|
+
meter_label = meter_patterns.samavfttas_by_family_and_gaRa[pada_len][gaRa_pattern]
|
|
942
|
+
meter_label += ' [%d: %s]' % (
|
|
943
|
+
pada_len,
|
|
944
|
+
meter_patterns.choose_heavy_gaRa_pattern(gaRa_pattern)
|
|
945
|
+
)
|
|
946
|
+
return meter_label, False
|
|
947
|
+
meter_label = 'ajñātam [%d: %s]' % (pada_len, gaRa_str)
|
|
948
|
+
return meter_label, True
|
|
949
|
+
|
|
950
|
+
def _synthesize_upajAti_label(self, meter_labels, wbp_lens, unique_sorted_lens, family_lengths):
|
|
951
|
+
"""Build (overall_meter_label, family, notable_label_dict) from per-pāda meter_labels.
|
|
952
|
+
|
|
953
|
+
overall_meter_label format: "upajāti triṣṭubh: upendravajrā 1,3; vātormī 2; indravajrā 4"
|
|
954
|
+
— subtypes sorted by pāda count desc, then first-occurrence asc; no syllable/gaṇa info.
|
|
955
|
+
notable_label_dict: {pada_num (1-based): bare_name} for all non-ajñātam pādas.
|
|
956
|
+
"""
|
|
957
|
+
# Extract bare subtype name (strip " [len: gaṇas]" suffix).
|
|
958
|
+
def _bare_name(lbl):
|
|
959
|
+
return lbl.split(' [')[0]
|
|
960
|
+
|
|
961
|
+
# Build notable_label_dict and group pāda numbers by bare name.
|
|
962
|
+
notable_label_dict = {}
|
|
963
|
+
name_to_padas = {} # bare_name → [1-based pada nums], in order
|
|
964
|
+
for i, lbl in enumerate(meter_labels):
|
|
965
|
+
pada_num = i + 1
|
|
966
|
+
name = _bare_name(lbl)
|
|
967
|
+
if not name.startswith('ajñātam'):
|
|
968
|
+
notable_label_dict[pada_num] = name
|
|
969
|
+
name_to_padas.setdefault(name, []).append(pada_num)
|
|
970
|
+
|
|
971
|
+
# Sort groups: count desc, then first occurrence asc.
|
|
972
|
+
sorted_groups = sorted(
|
|
973
|
+
name_to_padas.items(),
|
|
974
|
+
key=lambda kv: (-len(kv[1]), kv[1][0])
|
|
975
|
+
)
|
|
976
|
+
combined_parts = [
|
|
977
|
+
'%s %s' % (name, ','.join(str(p) for p in padas))
|
|
978
|
+
for name, padas in sorted_groups
|
|
979
|
+
]
|
|
980
|
+
combined_meter_labels = '; '.join(combined_parts)
|
|
981
|
+
|
|
982
|
+
# Pick family name from family_lengths: prefer 11, then 12, then smallest.
|
|
983
|
+
family_len = 11 if 11 in family_lengths else (12 if 12 in family_lengths else min(family_lengths))
|
|
984
|
+
family = meter_patterns.samavftta_family_names[family_len] if family_len < 27 else 'daṇḍaka'
|
|
985
|
+
if unique_sorted_lens == [11, 12]:
|
|
986
|
+
family = 'triṣṭubh + jagatī'
|
|
987
|
+
|
|
988
|
+
overall_meter_label = 'upajāti %s: %s' % (family, combined_meter_labels)
|
|
989
|
+
return overall_meter_label, family, notable_label_dict
|
|
990
|
+
|
|
991
|
+
def _upajAti_levenshtein_attribute_pada(self, pada_weights, family_lengths):
|
|
992
|
+
"""Deferred-pass Levenshtein attribution for one upajāti pāda.
|
|
993
|
+
|
|
994
|
+
Tries every known pattern of a family-context length within
|
|
995
|
+
ARDHASAMAVFTTA_EDIT_DISTANCE_THRESHOLD. Returns
|
|
996
|
+
(meter_name, canonical_gaRa, canonical_weights, problem_indices, distance)
|
|
997
|
+
or None if no pattern is within threshold.
|
|
998
|
+
"""
|
|
999
|
+
pada_len = len(pada_weights)
|
|
1000
|
+
best = None # (distance, meter_name, canonical_gaRa, canonical_weights, problem_indices)
|
|
1001
|
+
all_at_best = []
|
|
1002
|
+
for L_candidate in family_lengths:
|
|
1003
|
+
if abs(pada_len - L_candidate) > ARDHASAMAVFTTA_EDIT_DISTANCE_THRESHOLD:
|
|
1004
|
+
continue
|
|
1005
|
+
for canonical_gaRa, canonical_weights, meter_name, _regex_str in _upajAti_patterns_by_length.get(L_candidate, []):
|
|
1006
|
+
dist, prob_indices = _levenshtein_align_cached(pada_weights, canonical_weights)
|
|
1007
|
+
if dist > ARDHASAMAVFTTA_EDIT_DISTANCE_THRESHOLD:
|
|
1008
|
+
continue
|
|
1009
|
+
entry = (dist, meter_name, canonical_gaRa, canonical_weights, prob_indices)
|
|
1010
|
+
if best is None or dist < best[0]:
|
|
1011
|
+
best = entry
|
|
1012
|
+
all_at_best = [entry]
|
|
1013
|
+
elif dist == best[0]:
|
|
1014
|
+
all_at_best.append(entry)
|
|
1015
|
+
if best is None:
|
|
1016
|
+
return None
|
|
1017
|
+
# special case: indravajrā/upendravajrā are equidistant by design; record jointly
|
|
1018
|
+
names_at_best = {e[1] for e in all_at_best}
|
|
1019
|
+
if names_at_best == {'indravajrā', 'upendravajrā'}:
|
|
1020
|
+
indra = next(e for e in all_at_best if e[1] == 'indravajrā')
|
|
1021
|
+
upendra = next(e for e in all_at_best if e[1] == 'upendravajrā')
|
|
1022
|
+
joint_name = 'indravajrā / upendravajrā'
|
|
1023
|
+
joint_canonical = '%s / %s' % (indra[2], upendra[2])
|
|
1024
|
+
return (joint_name, joint_canonical, indra[3], indra[4], best[0])
|
|
1025
|
+
return (best[1], best[2], best[3], best[4], best[0])
|
|
1026
|
+
|
|
1027
|
+
def evaluate_upajAti(self, Vrs, perfect_only=True):
|
|
889
1028
|
# sufficient length similarity already assured, now just evaluate
|
|
890
1029
|
|
|
891
1030
|
wbp = Vrs.syllable_weights.split('\n') # weights by pāda
|
|
892
|
-
wbp_lens_orig = [
|
|
893
|
-
|
|
894
|
-
|
|
1031
|
+
wbp_lens_orig = [len(line) for line in wbp]
|
|
1032
|
+
n_pAdas = min(len(wbp), 4)
|
|
1033
|
+
wbp = wbp[:4]
|
|
1034
|
+
wbp_lens = wbp_lens_orig[:4]
|
|
1035
|
+
gs_to_id = Vrs.gaRa_abbreviations.split('\n')[:4]
|
|
1036
|
+
missing_pAdas = max(0, 4 - n_pAdas)
|
|
1037
|
+
|
|
1038
|
+
unique_sorted_lens = sorted(set(wbp_lens))
|
|
895
1039
|
|
|
896
|
-
#
|
|
1040
|
+
# Determine family lengths from most-frequent pāda length.
|
|
897
1041
|
# see Karashima 2016 "The Triṣṭubh-Jagatī Verses in the Saddharmapuṇḍarīka"
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
#
|
|
902
|
-
|
|
903
|
-
|
|
904
|
-
if
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
if len(weights) != most_freq_pAda_len:
|
|
911
|
-
to_exclude.append(i)
|
|
912
|
-
excluded_indices = list(to_exclude)
|
|
913
|
-
for i in reversed(to_exclude): # delete in descending index order, avoid index errors
|
|
914
|
-
del wbp[i]
|
|
915
|
-
del wbp_lens[i]
|
|
916
|
-
del gs_to_id[i]
|
|
1042
|
+
most_freq_pAda_len = max(sorted(set(wbp_lens)), key=wbp_lens.count)
|
|
1043
|
+
if allow_only_trizwuB_and_jagatI_upajAti and most_freq_pAda_len not in (11, 12):
|
|
1044
|
+
return
|
|
1045
|
+
# family_lengths: the set of lengths to match against. Always includes 11
|
|
1046
|
+
# and/or 12 if present; length-deviant pādas go ajñātam → Lev rescue candidate.
|
|
1047
|
+
family_lengths = set()
|
|
1048
|
+
if 11 in wbp_lens:
|
|
1049
|
+
family_lengths.add(11)
|
|
1050
|
+
if 12 in wbp_lens:
|
|
1051
|
+
family_lengths.add(12)
|
|
1052
|
+
if not family_lengths:
|
|
1053
|
+
family_lengths = {most_freq_pAda_len}
|
|
917
1054
|
|
|
918
1055
|
# Calculate maximum achievable score before doing any pattern work,
|
|
919
1056
|
# and bail early if we can't beat the current best.
|
|
920
1057
|
potential_score = meter_scores["upajāti, perfect"]
|
|
921
|
-
if 11 not in wbp_lens:
|
|
922
|
-
potential_score -=
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
): # not perfect, less than 4 being analyzed
|
|
927
|
-
potential_score -= 2
|
|
928
|
-
if ( potential_score < Vrs.identification_score
|
|
929
|
-
# not going to beat pre-existing result (e.g. 7 from imperfect samavftta)
|
|
930
|
-
) or ( disable_non_trizwuB_upajAti
|
|
931
|
-
and potential_score < meter_scores["upajāti, imperfect"]
|
|
932
|
-
):
|
|
1058
|
+
if 11 not in wbp_lens:
|
|
1059
|
+
potential_score -= meter_scores["upajāti, penalty, jagati"]
|
|
1060
|
+
potential_score -= missing_pAdas * meter_scores["upajāti, penalty, per missing pāda"]
|
|
1061
|
+
if potential_score < Vrs.identification_score:
|
|
1062
|
+
# not going to beat pre-existing result (e.g. 7 from imperfect samavṛtta)
|
|
933
1063
|
return
|
|
934
1064
|
|
|
935
|
-
# Identify each
|
|
1065
|
+
# Identify each pāda individually. Exact match is restricted to family lengths;
|
|
1066
|
+
# length-deviant pādas go straight to ajñātam and become Lev rescue candidates.
|
|
936
1067
|
meter_labels = []
|
|
1068
|
+
any_ajnata = False
|
|
1069
|
+
any_exact = False
|
|
1070
|
+
vikrta_count = 0
|
|
1071
|
+
vikrta_info = {} # pada_index (0-based) → (orig_len, canonical_len, problem_indices)
|
|
937
1072
|
for i, g_to_id in enumerate(gs_to_id):
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
regex = re.compile(gaRa_pattern)
|
|
942
|
-
|
|
943
|
-
if re.match(regex, g_to_id):
|
|
944
|
-
|
|
945
|
-
meter_label = meter_patterns.samavfttas_by_family_and_gaRa[wbp_lens[i]][gaRa_pattern]
|
|
946
|
-
meter_label += ' [%d: %s]' % (
|
|
947
|
-
wbp_lens[i],
|
|
948
|
-
meter_patterns.choose_heavy_gaRa_pattern(gaRa_pattern)
|
|
949
|
-
)
|
|
950
|
-
break
|
|
951
|
-
|
|
1073
|
+
if wbp_lens[i] in family_lengths:
|
|
1074
|
+
meter_label, is_ajnata = self._upajAti_match_pada_exact(wbp_lens[i], g_to_id)
|
|
952
1075
|
else:
|
|
953
|
-
meter_label =
|
|
954
|
-
|
|
955
|
-
|
|
1076
|
+
meter_label = 'ajñātam [%d: %s]' % (wbp_lens[i], g_to_id)
|
|
1077
|
+
is_ajnata = True
|
|
1078
|
+
if is_ajnata:
|
|
1079
|
+
any_ajnata = True
|
|
1080
|
+
if not perfect_only:
|
|
1081
|
+
lev_result = self._upajAti_levenshtein_attribute_pada(wbp[i], family_lengths)
|
|
1082
|
+
if lev_result is not None:
|
|
1083
|
+
meter_name, canonical_gaRa, canonical_weights, problem_indices, dist = lev_result
|
|
1084
|
+
meter_label = '%s [%d: %s]' % (meter_name, len(canonical_weights), canonical_gaRa)
|
|
1085
|
+
vikrta_count += 1
|
|
1086
|
+
vikrta_info[i] = (wbp_lens[i], len(canonical_weights), problem_indices, canonical_gaRa, dist)
|
|
1087
|
+
else:
|
|
1088
|
+
any_exact = True
|
|
956
1089
|
meter_labels.append(meter_label)
|
|
957
1090
|
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
family = meter_patterns.samavftta_family_names[wbp_lens[0]] if wbp_lens[0] < 27 else 'daṇḍaka'
|
|
963
|
-
if (family == "triṣṭubh" and
|
|
964
|
-
unique_meter_labels == ['indravajrā [11: ttjgg]', 'upendravajrā [11: jtjgg]']
|
|
965
|
-
):
|
|
966
|
-
family = '' # clearer not to specify in this case
|
|
967
|
-
|
|
968
|
-
if len(wbp_lens) == 4 and unique_sorted_lens == [11]: # triṣṭubh
|
|
969
|
-
score = meter_scores["upajāti, perfect"]
|
|
970
|
-
elif unique_sorted_lens == [11, 12]:
|
|
971
|
-
score = meter_scores["upajāti, triṣṭubh-jagatī-saṃkara, perfect"]
|
|
972
|
-
family = "triṣṭubh-jagatī-saṃkara?" # overwrite
|
|
973
|
-
elif len(wbp_lens) == 4 and 11 not in unique_sorted_lens:
|
|
974
|
-
score = meter_scores["upajāti, non-triṣṭubh, perfect"]
|
|
975
|
-
elif len(wbp_lens) in [2,3] and wbp_lens.count(11) == len(wbp_lens): # triṣṭubh
|
|
976
|
-
score = meter_scores["upajāti, imperfect"]
|
|
977
|
-
elif len(wbp_lens) in [2,3] and 11 not in wbp_lens:
|
|
978
|
-
score = meter_scores["upajāti, non-triṣṭubh, imperfect"]
|
|
979
|
-
else:
|
|
980
|
-
score = meter_scores["none found"]
|
|
1091
|
+
# forward pass: flag for deferred Levenshtein if any pāda is ajñātam
|
|
1092
|
+
# but only if at least one matched exactly (verse is plausibly upajāti)
|
|
1093
|
+
if perfect_only and any_ajnata and any_exact:
|
|
1094
|
+
self._upajAti_needs_lev = True
|
|
981
1095
|
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
1096
|
+
overall_meter_label, family, notable_label_dict = self._synthesize_upajAti_label(
|
|
1097
|
+
meter_labels, wbp_lens, unique_sorted_lens, family_lengths
|
|
1098
|
+
)
|
|
1099
|
+
|
|
1100
|
+
score = meter_scores["upajāti, perfect"]
|
|
1101
|
+
if 11 not in wbp_lens:
|
|
1102
|
+
score -= meter_scores["upajāti, penalty, jagati"]
|
|
1103
|
+
score -= missing_pAdas * meter_scores["upajāti, penalty, per missing pāda"]
|
|
1104
|
+
ajnatam_count = sum(1 for lbl in meter_labels if lbl.startswith('ajñātam'))
|
|
1105
|
+
# vikṛta-rescued pādas carry the same penalty as ajñātam until calibration
|
|
1106
|
+
# introduces a dedicated vikṛtavṛtta penalty (Step 2)
|
|
1107
|
+
score -= (ajnatam_count + vikrta_count) * meter_scores["upajāti, penalty, per ajñātam pāda"]
|
|
987
1108
|
|
|
988
1109
|
imperfect_note = len(wbp_lens) != 4 and unique_sorted_lens != [11, 12]
|
|
989
|
-
overall_meter_label = "upajāti %s: %s" % (
|
|
990
|
-
family,
|
|
991
|
-
combined_meter_labels
|
|
992
|
-
)
|
|
993
1110
|
|
|
994
|
-
# Build diagnostic
|
|
995
|
-
# to the majority length; included pādas contribute no error entry.
|
|
996
|
-
most_freq_len = wbp_lens[0] if wbp_lens else None
|
|
1111
|
+
# Build diagnostic from per-pāda attribution results.
|
|
997
1112
|
problem_syllables = {}
|
|
998
1113
|
per_pada_sanskrit = {}
|
|
999
1114
|
per_pada_english = {}
|
|
1115
|
+
canonical_gana = {}
|
|
1000
1116
|
for pada_num in range(1, 5):
|
|
1001
|
-
|
|
1002
|
-
if
|
|
1003
|
-
|
|
1117
|
+
i = pada_num - 1
|
|
1118
|
+
lbl = meter_labels[i] if i < len(meter_labels) else None
|
|
1119
|
+
if lbl and lbl.startswith('ajñātam'):
|
|
1120
|
+
orig_len = wbp_lens[i]
|
|
1121
|
+
syls = list(range(orig_len))
|
|
1004
1122
|
problem_syllables[pada_num] = syls
|
|
1005
|
-
|
|
1006
|
-
|
|
1123
|
+
hyper = orig_len > most_freq_pAda_len
|
|
1124
|
+
per_pada_sanskrit[pada_num] = 'adhikākṣarā' if hyper else 'ūnākṣarā'
|
|
1125
|
+
per_pada_english[pada_num] = 'hypermetric' if hyper else 'hypometric'
|
|
1126
|
+
elif i in vikrta_info:
|
|
1127
|
+
orig_len, canonical_len, problem_indices, vikrta_canonical_gaRa, vikrta_dist = vikrta_info[i]
|
|
1128
|
+
if orig_len != canonical_len:
|
|
1129
|
+
# length-deviant vikṛta: flag as hyper/hypometric
|
|
1130
|
+
hyper = orig_len > canonical_len
|
|
1007
1131
|
per_pada_sanskrit[pada_num] = 'adhikākṣarā' if hyper else 'ūnākṣarā'
|
|
1008
1132
|
per_pada_english[pada_num] = 'hypermetric' if hyper else 'hypometric'
|
|
1133
|
+
# Only pinpoint the gap when dist==1; higher distances mean additional weight
|
|
1134
|
+
# mismatches that make the gap position unreliable.
|
|
1135
|
+
if vikrta_dist == 1 and problem_indices:
|
|
1136
|
+
problem_syllables[pada_num] = list(problem_indices)
|
|
1137
|
+
canonical_gana[pada_num] = vikrta_canonical_gaRa
|
|
1138
|
+
else:
|
|
1139
|
+
problem_syllables[pada_num] = list(range(orig_len))
|
|
1140
|
+
elif problem_indices:
|
|
1141
|
+
# same-length vikṛta: flag the specific mismatched positions
|
|
1142
|
+
per_pada_sanskrit[pada_num] = 'vikṛtavṛtta'
|
|
1143
|
+
per_pada_english[pada_num] = 'vikrtavrtta'
|
|
1144
|
+
problem_syllables[pada_num] = list(problem_indices)
|
|
1009
1145
|
|
|
1010
1146
|
# Append per-pāda imperfect notes to label.
|
|
1011
1147
|
length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
|
|
@@ -1013,27 +1149,42 @@ class VerseTester(object):
|
|
|
1013
1149
|
overall_meter_label += " (%s)" % "; ".join(length_notes)
|
|
1014
1150
|
|
|
1015
1151
|
if not per_pada_english and not imperfect_note:
|
|
1016
|
-
diagnostic = Diagnostic(
|
|
1152
|
+
diagnostic = Diagnostic(
|
|
1153
|
+
perfect_id_label=overall_meter_label,
|
|
1154
|
+
notable_label=notable_label_dict or None,
|
|
1155
|
+
)
|
|
1017
1156
|
elif not imperfect_note:
|
|
1018
1157
|
diagnostic = Diagnostic(
|
|
1019
1158
|
perfect_id_label=overall_meter_label,
|
|
1020
1159
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1021
1160
|
imperfect_label_english=per_pada_english or None,
|
|
1022
1161
|
problem_syllables=problem_syllables or None,
|
|
1162
|
+
notable_label=notable_label_dict or None,
|
|
1163
|
+
canonical_gana=canonical_gana or None,
|
|
1023
1164
|
)
|
|
1024
1165
|
else:
|
|
1025
1166
|
diagnostic = Diagnostic(
|
|
1026
1167
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1027
1168
|
imperfect_label_english=per_pada_english or None,
|
|
1028
1169
|
problem_syllables=problem_syllables or None,
|
|
1170
|
+
notable_label=notable_label_dict or None,
|
|
1171
|
+
canonical_gana=canonical_gana or None,
|
|
1029
1172
|
)
|
|
1030
1173
|
|
|
1031
|
-
# score arbitration: may tie with pre-existing result (e.g., samavṛtta)
|
|
1174
|
+
# score arbitration: may tie with pre-existing result (e.g., samavṛtta).
|
|
1175
|
+
# Deferred pass overwrites the forward-pass placeholder directly (same
|
|
1176
|
+
# identification refined, not a new competitor).
|
|
1032
1177
|
old_score = Vrs.identification_score
|
|
1033
1178
|
is_perfect = not imperfect_note and not per_pada_english
|
|
1034
|
-
|
|
1035
|
-
|
|
1179
|
+
if not perfect_only and Vrs.meter_label is not None and Vrs.meter_label.startswith('upajāti'):
|
|
1180
|
+
Vrs.meter_label = overall_meter_label
|
|
1181
|
+
Vrs.identification_score = score
|
|
1182
|
+
Vrs.is_perfect = is_perfect
|
|
1036
1183
|
Vrs.diagnostic = diagnostic
|
|
1184
|
+
else:
|
|
1185
|
+
self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=is_perfect, new_diagnostic=diagnostic)
|
|
1186
|
+
if score >= old_score:
|
|
1187
|
+
Vrs.diagnostic = diagnostic
|
|
1037
1188
|
|
|
1038
1189
|
|
|
1039
1190
|
def is_vizamavftta(self, Vrs, perfect_only=False):
|
|
@@ -1123,82 +1274,17 @@ class VerseTester(object):
|
|
|
1123
1274
|
imperfect_label = best_label + f" ({suffix})"
|
|
1124
1275
|
|
|
1125
1276
|
old_score = Vrs.identification_score
|
|
1126
|
-
|
|
1277
|
+
_diag = Diagnostic(
|
|
1278
|
+
perfect_id_label=imperfect_label,
|
|
1279
|
+
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1280
|
+
imperfect_label_english=per_pada_english or None,
|
|
1281
|
+
problem_syllables=problem_syllables or None,
|
|
1282
|
+
)
|
|
1283
|
+
self.combine_results(Vrs, new_label=imperfect_label, new_score=score, new_diagnostic=_diag)
|
|
1127
1284
|
if score >= old_score:
|
|
1128
|
-
Vrs.diagnostic =
|
|
1129
|
-
perfect_id_label=imperfect_label,
|
|
1130
|
-
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1131
|
-
imperfect_label_english=per_pada_english or None,
|
|
1132
|
-
problem_syllables=problem_syllables or None,
|
|
1133
|
-
)
|
|
1285
|
+
Vrs.diagnostic = _diag
|
|
1134
1286
|
return True
|
|
1135
1287
|
|
|
1136
|
-
def test_as_samavftta_etc(self, Vrs):
|
|
1137
|
-
|
|
1138
|
-
wbp = Vrs.syllable_weights.split('\n') # weights by pāda
|
|
1139
|
-
wbp_lens = [ len(line) for line in wbp ]
|
|
1140
|
-
|
|
1141
|
-
# make sure either full four pādas or one and single-pāda mode
|
|
1142
|
-
if len(wbp) >= 4 or (
|
|
1143
|
-
len(wbp) == 1 and self.resplit_option == "single_pAda"
|
|
1144
|
-
):
|
|
1145
|
-
pass
|
|
1146
|
-
else:
|
|
1147
|
-
return 0
|
|
1148
|
-
|
|
1149
|
-
self.count_pAdasamatva(Vrs) # [0,2,3,4]
|
|
1150
|
-
|
|
1151
|
-
# test in following order to prioritize left-right presentation of ties
|
|
1152
|
-
# ties managed in self.combine_results()
|
|
1153
|
-
|
|
1154
|
-
# test perfect samavṛtta
|
|
1155
|
-
if self.pAdasamatva_count == 4:
|
|
1156
|
-
# definitely checks out, id_score == 9
|
|
1157
|
-
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
1158
|
-
return 1 # max score already reached
|
|
1159
|
-
|
|
1160
|
-
|
|
1161
|
-
|
|
1162
|
-
# test perfect single pāda of samavṛtta
|
|
1163
|
-
if ( self.pAdasamatva_count == 0 and self.resplit_option == "single_pAda"):
|
|
1164
|
-
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
1165
|
-
|
|
1166
|
-
# test perfect viṣamavṛtta (Levenshtein for imperfect deferred to imperfect pass)
|
|
1167
|
-
if self.pAdasamatva_count == 0 and timed('vizamavftta')(self.is_vizamavftta)(Vrs, perfect_only=True):
|
|
1168
|
-
# will give id_score == 9
|
|
1169
|
-
# label and score already set in is_vizamavftta if test was successful
|
|
1170
|
-
return 1 # max score already reached
|
|
1171
|
-
|
|
1172
|
-
# test perfect upajāti
|
|
1173
|
-
|
|
1174
|
-
unique_sorted_lens = list(set(wbp_lens))
|
|
1175
|
-
unique_sorted_lens.sort()
|
|
1176
|
-
if len(unique_sorted_lens) == 1: # all same length
|
|
1177
|
-
# will give id_score in [8, 7], may tie with above
|
|
1178
|
-
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
1179
|
-
if Vrs.identification_score == 8: return 1 # best score compared to below
|
|
1180
|
-
# otherwise, max score not necessarily yet reached, don't return
|
|
1181
|
-
|
|
1182
|
-
# test imperfect samavftta (Levenshtein for length errors deferred to imperfect pass)
|
|
1183
|
-
if self.pAdasamatva_count in [2, 3]:
|
|
1184
|
-
# will give id_score in [7, 6], may tie with above
|
|
1185
|
-
timed('samavftta')(self.evaluate_samavftta)(Vrs, perfect_only=True)
|
|
1186
|
-
# max score not necessarily yet reached, don't return
|
|
1187
|
-
|
|
1188
|
-
# test imperfect upajāti
|
|
1189
|
-
if (
|
|
1190
|
-
len( list(set(wbp_lens)) ) in [2, 3] or
|
|
1191
|
-
unique_sorted_lens == [11, 12]
|
|
1192
|
-
): # either not all same length or triṣṭubh-jagatī mix
|
|
1193
|
-
# will give id_score in [6, 5, 4], may tie with above
|
|
1194
|
-
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
1195
|
-
|
|
1196
|
-
# return success
|
|
1197
|
-
if Vrs.meter_label != None:
|
|
1198
|
-
return 1
|
|
1199
|
-
else:
|
|
1200
|
-
return 0
|
|
1201
|
-
|
|
1202
1288
|
def test_as_jAti(self, Vrs):
|
|
1203
1289
|
"""
|
|
1204
1290
|
Determines whether verse is of jāti (mātrāvṛtta) type.
|
|
@@ -1239,32 +1325,80 @@ class VerseTester(object):
|
|
|
1239
1325
|
close1 = abs(eff1 - std_ardha[0]) <= 1
|
|
1240
1326
|
close2 = abs(eff2 - std_ardha[1]) <= 1
|
|
1241
1327
|
if close1 and close2:
|
|
1242
|
-
jati_label = jAti_name
|
|
1328
|
+
jati_label = jAti_name
|
|
1243
1329
|
likely_score = meter_scores["jāti, likely"]
|
|
1244
1330
|
if likely_score > Vrs.identification_score:
|
|
1245
1331
|
per_pada_sanskrit = {}
|
|
1246
1332
|
per_pada_english = {}
|
|
1247
|
-
# Attribute ardha-level mora error to the
|
|
1333
|
+
# Attribute ardha-level mora error to the even pāda key, but label by ardha.
|
|
1248
1334
|
ardha_morae_pairs = [
|
|
1249
|
-
(m1, std_ardha[0], 2),
|
|
1250
|
-
(m2, std_ardha[1], 4),
|
|
1335
|
+
(m1, std_ardha[0], 1, ardha1_w, 2),
|
|
1336
|
+
(m2, std_ardha[1], 2, ardha2_w, 4),
|
|
1251
1337
|
]
|
|
1252
|
-
for actual, expected, even_pada in ardha_morae_pairs:
|
|
1253
|
-
|
|
1254
|
-
|
|
1255
|
-
|
|
1338
|
+
for actual, expected, ardha_num, ardha_w, even_pada in ardha_morae_pairs:
|
|
1339
|
+
anceps_ok = actual == expected - 1 and ardha_w[-1:] == 'l'
|
|
1340
|
+
if actual != expected and not anceps_ok:
|
|
1341
|
+
hyper = actual > expected
|
|
1342
|
+
per_pada_sanskrit[even_pada] = f"ardha {ardha_num}: " + ('adhikamātrā' if hyper else 'ūnamātrā') + f", {expected}→{actual}"
|
|
1343
|
+
per_pada_english[even_pada] = f"ardha {ardha_num} mora count off from expected {expected}"
|
|
1256
1344
|
# Build meter_label suffix from the per-ardha directions.
|
|
1257
|
-
|
|
1258
|
-
|
|
1259
|
-
|
|
1345
|
+
ardha_labels = [
|
|
1346
|
+
(ardha_num, per_pada_sanskrit[even_pada])
|
|
1347
|
+
for ardha_num, even_pada in [(1, 2), (2, 4)]
|
|
1348
|
+
if even_pada in per_pada_sanskrit
|
|
1349
|
+
]
|
|
1350
|
+
if not ardha_labels:
|
|
1351
|
+
suffix = 'asamīcīnā'
|
|
1352
|
+
else:
|
|
1353
|
+
suffix = '; '.join(v for _, v in ardha_labels)
|
|
1354
|
+
# Decompose all ardhas for gaṇa abbreviations and problem syllable pinpointing.
|
|
1355
|
+
g8_morae = 4 if jAti_name == 'āryāgīti' else 2
|
|
1356
|
+
ardha1_ganas = _decompose_into_mAtragaNas(ardha1_w, g6_ardha1, g8_morae)
|
|
1357
|
+
ardha2_ganas = _decompose_into_mAtragaNas(ardha2_w, g6_ardha2, g8_morae)
|
|
1358
|
+
names = meter_patterns.mAtragaNa_names
|
|
1359
|
+
def _ganas_to_abbrevs(ganas):
|
|
1360
|
+
return ' '.join(names.get(g, g) for g in ganas)
|
|
1361
|
+
def _split_ardha_ganas(ganas, pada_a_syl_count):
|
|
1362
|
+
cur = 0
|
|
1363
|
+
for i, g in enumerate(ganas):
|
|
1364
|
+
if cur >= pada_a_syl_count:
|
|
1365
|
+
return _ganas_to_abbrevs(ganas[:i]), _ganas_to_abbrevs(ganas[i:])
|
|
1366
|
+
cur += len(g)
|
|
1367
|
+
return _ganas_to_abbrevs(ganas), ''
|
|
1368
|
+
if len(w_p) >= 4:
|
|
1369
|
+
p1a, p1b = _split_ardha_ganas(ardha1_ganas, len(w_p[0]))
|
|
1370
|
+
p2a, p2b = _split_ardha_ganas(ardha2_ganas, len(w_p[2]))
|
|
1371
|
+
mAtragaNa_abbrevs = '\n'.join([p1a, p1b, p2a, p2b])
|
|
1260
1372
|
else:
|
|
1261
|
-
|
|
1373
|
+
mAtragaNa_abbrevs = '\n'.join([_ganas_to_abbrevs(ardha1_ganas), _ganas_to_abbrevs(ardha2_ganas)])
|
|
1374
|
+
problem_syllables = {}
|
|
1375
|
+
for actual, expected, ardha_num, ardha_w, even_pada in ardha_morae_pairs:
|
|
1376
|
+
anceps_ok = actual == expected - 1 and ardha_w[-1:] == 'l'
|
|
1377
|
+
if actual == expected or anceps_ok:
|
|
1378
|
+
continue
|
|
1379
|
+
g6 = g6_ardha1 if ardha_num == 1 else g6_ardha2
|
|
1380
|
+
ganas = ardha1_ganas if ardha_num == 1 else ardha2_ganas
|
|
1381
|
+
err = _validate_jAti_gaNas(ganas, g6, jAti_name, ardha_num)
|
|
1382
|
+
if err:
|
|
1383
|
+
_, bad_syls = err
|
|
1384
|
+
# map ardha-level offsets to pāda-level
|
|
1385
|
+
pada_a = ardha_num * 2 - 1
|
|
1386
|
+
pada_b = ardha_num * 2
|
|
1387
|
+
pada_a_len = len(w_p[pada_a - 1]) if len(w_p) >= 4 else 0
|
|
1388
|
+
a_syls = [i for i in bad_syls if i < pada_a_len]
|
|
1389
|
+
b_syls = [i - pada_a_len for i in bad_syls if i >= pada_a_len]
|
|
1390
|
+
if a_syls: problem_syllables[pada_a] = a_syls
|
|
1391
|
+
if b_syls: problem_syllables[pada_b] = b_syls
|
|
1392
|
+
if not a_syls and not b_syls:
|
|
1393
|
+
problem_syllables[pada_b] = bad_syls
|
|
1262
1394
|
Vrs.meter_label = jati_label + f" ({suffix})"
|
|
1263
1395
|
Vrs.identification_score = likely_score
|
|
1264
1396
|
Vrs.is_perfect = False
|
|
1397
|
+
Vrs.mAtragaNa_abbreviations = mAtragaNa_abbrevs
|
|
1265
1398
|
Vrs.diagnostic = Diagnostic(
|
|
1266
1399
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1267
1400
|
imperfect_label_english=per_pada_english or None,
|
|
1401
|
+
problem_syllables=problem_syllables or None,
|
|
1268
1402
|
)
|
|
1269
1403
|
continue
|
|
1270
1404
|
|
|
@@ -1423,7 +1557,7 @@ class VerseTester(object):
|
|
|
1423
1557
|
parts = [s for s in [ardha1_str, ardha2_str] if s]
|
|
1424
1558
|
imperfect_label_sa = '; '.join(parts) if parts else _gana_error_sanskrit((err1 or err2)[0])
|
|
1425
1559
|
|
|
1426
|
-
jati_label = jAti_name
|
|
1560
|
+
jati_label = jAti_name
|
|
1427
1561
|
jati_score = meter_scores["jāti, imperfect"]
|
|
1428
1562
|
# penalise pāda mora mismatches so that resplit attempts with better
|
|
1429
1563
|
# pāda alignment score higher and win arbitration in combine_results
|
|
@@ -1433,7 +1567,7 @@ class VerseTester(object):
|
|
|
1433
1567
|
anceps_ok = (is_ardha_final and actual == expected - 1
|
|
1434
1568
|
and w_p[pi] and w_p[pi][-1] == 'l')
|
|
1435
1569
|
if actual != expected and not anceps_ok:
|
|
1436
|
-
jati_score -=
|
|
1570
|
+
jati_score -= meter_scores["jāti, penalty, per mora-mismatched pāda"]
|
|
1437
1571
|
if jati_score >= Vrs.identification_score:
|
|
1438
1572
|
Vrs.meter_label = jati_label + f" ({imperfect_label_sa})"
|
|
1439
1573
|
Vrs.identification_score = jati_score
|
|
@@ -1447,7 +1581,7 @@ class VerseTester(object):
|
|
|
1447
1581
|
return 1
|
|
1448
1582
|
|
|
1449
1583
|
# Gaṇa rules passed — check whether pāda-level morae also match.
|
|
1450
|
-
jati_label = jAti_name
|
|
1584
|
+
jati_label = jAti_name
|
|
1451
1585
|
def quarters_ok(actual, expected, weights):
|
|
1452
1586
|
if len(actual) < 4 or len(weights) < 4:
|
|
1453
1587
|
return False
|
|
@@ -1504,45 +1638,90 @@ class VerseTester(object):
|
|
|
1504
1638
|
Runs through various possible meter types with respective identification_scores:
|
|
1505
1639
|
zloka
|
|
1506
1640
|
9 two zloka halves, both perfect
|
|
1507
|
-
|
|
1508
|
-
|
|
1641
|
+
7 two zloka halves, one perfect and one imperfect
|
|
1642
|
+
5 two zloka halves, both imperfect
|
|
1509
1643
|
9 one zloka half, perfect
|
|
1510
|
-
(not currently supported: one zloka half, imperfect)
|
|
1511
1644
|
samavftta, upajAti, vizamavftta, ardhasamavftta
|
|
1512
|
-
9 vizamavftta perfect
|
|
1645
|
+
9 vizamavftta perfect
|
|
1513
1646
|
(currently not supported: 5 vizamavftta imperfect)
|
|
1514
|
-
|
|
1515
|
-
|
|
1647
|
+
9 ardhasamavftta perfect
|
|
1648
|
+
7 ardhasamavftta imperfect
|
|
1516
1649
|
9 samavftta perfect
|
|
1517
|
-
8 upajAti perfect
|
|
1650
|
+
8 upajAti perfect (4 pAdas, triṣṭubh/jagatī/mix)
|
|
1518
1651
|
7 samavftta imperfect (2-3 lines match)
|
|
1519
|
-
|
|
1520
|
-
6 upajAti imperfect trizwuB
|
|
1521
|
-
5 upajAti imperfect non-trizwuB
|
|
1652
|
+
6 upajAti imperfect (2-3 pAdas)
|
|
1522
1653
|
jAti
|
|
1523
|
-
|
|
1524
|
-
|
|
1654
|
+
9 jAti perfect
|
|
1655
|
+
6 jAti imperfect
|
|
1656
|
+
4 jAti likely (±1 mora)
|
|
1525
1657
|
|
|
1526
1658
|
Embeds identification results as Verse.meter_label and Verse.identification_score.
|
|
1527
|
-
Returns string corresponding to Verse.meter_label. - currently
|
|
1528
|
-
Returns int result 1 if successul and 0 if not. - planned
|
|
1529
1659
|
"""
|
|
1530
1660
|
|
|
1531
1661
|
self.identification_attempt_count += 1
|
|
1532
1662
|
self._samavftta_has_length_error = False
|
|
1663
|
+
self._upajAti_needs_lev = False
|
|
1533
1664
|
|
|
1534
1665
|
# anuzwuB
|
|
1535
1666
|
success_anuzwuB = timed('anuzwuB')(self.test_as_anuzwuB)(Vrs)
|
|
1536
1667
|
if success_anuzwuB and Vrs.identification_score == meter_scores["max score"]:
|
|
1537
1668
|
return 1
|
|
1538
1669
|
|
|
1539
|
-
#
|
|
1540
|
-
|
|
1541
|
-
|
|
1542
|
-
|
|
1670
|
+
# samavṛtta / upajāti / viṣamavṛtta. The `samavftta_etc` bucket captures
|
|
1671
|
+
# dispatcher overhead (count_pAdasamatva + gate evaluation) by bracketing
|
|
1672
|
+
# the whole block and subtracting the inner timed buckets.
|
|
1673
|
+
_etc_t0 = _time.perf_counter() if _DEBUG_TIMING else None
|
|
1674
|
+
_etc_inner_keys = ('samavftta', 'upajAti', 'vizamavftta')
|
|
1675
|
+
_pre_etc_inner = (
|
|
1676
|
+
{k: _section_totals.get(k, 0.0) for k in _etc_inner_keys}
|
|
1677
|
+
if _DEBUG_TIMING else None
|
|
1678
|
+
)
|
|
1679
|
+
wbp_lens = [len(line) for line in Vrs.syllable_weights.split('\n')]
|
|
1680
|
+
success_samavftta_etc = 0
|
|
1681
|
+
if len(wbp_lens) >= 4 or (len(wbp_lens) == 1 and self.resplit_option == "single_pAda"):
|
|
1682
|
+
self.count_pAdasamatva(Vrs) # populates self.pAdasamatva_count in [0,2,3,4]
|
|
1683
|
+
|
|
1684
|
+
# perfect samavṛtta
|
|
1685
|
+
if self.pAdasamatva_count == 4:
|
|
1686
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
1687
|
+
success_samavftta_etc = 1
|
|
1688
|
+
else:
|
|
1689
|
+
# single-pāda samavṛtta (perfect)
|
|
1690
|
+
if self.pAdasamatva_count == 0 and self.resplit_option == "single_pAda":
|
|
1691
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs)
|
|
1692
|
+
|
|
1693
|
+
# perfect viṣamavṛtta (Levenshtein for imperfect deferred below)
|
|
1694
|
+
if self.pAdasamatva_count == 0 and timed('vizamavftta')(self.is_vizamavftta)(Vrs, perfect_only=True):
|
|
1695
|
+
success_samavftta_etc = 1
|
|
1696
|
+
|
|
1697
|
+
# perfect upajāti: all pādas same length
|
|
1698
|
+
unique_sorted_lens = sorted(set(wbp_lens[:4]))
|
|
1699
|
+
if len(unique_sorted_lens) == 1:
|
|
1700
|
+
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
1701
|
+
if Vrs.identification_score == 8:
|
|
1702
|
+
success_samavftta_etc = 1
|
|
1703
|
+
|
|
1704
|
+
# imperfect samavṛtta (Levenshtein for length errors deferred below)
|
|
1705
|
+
if self.pAdasamatva_count in [2, 3]:
|
|
1706
|
+
timed('samavftta')(self.evaluate_samavftta)(Vrs, perfect_only=True)
|
|
1707
|
+
|
|
1708
|
+
# imperfect upajāti: mixed lengths — after samavṛtta so its score
|
|
1709
|
+
# can trigger the potential_score bail inside evaluate_upajAti
|
|
1710
|
+
if len(unique_sorted_lens) in [2, 3] or unique_sorted_lens == [11, 12]:
|
|
1711
|
+
timed('upajAti')(self.evaluate_upajAti)(Vrs)
|
|
1712
|
+
|
|
1713
|
+
if Vrs.meter_label is not None:
|
|
1714
|
+
success_samavftta_etc = 1
|
|
1715
|
+
|
|
1543
1716
|
if _DEBUG_TIMING:
|
|
1544
|
-
|
|
1545
|
-
|
|
1717
|
+
_etc_elapsed = _time.perf_counter() - _etc_t0
|
|
1718
|
+
_etc_inner_delta = sum(
|
|
1719
|
+
_section_totals.get(k, 0.0) - _pre_etc_inner[k] for k in _etc_inner_keys
|
|
1720
|
+
)
|
|
1721
|
+
_section_totals['samavftta_etc'] = (
|
|
1722
|
+
_section_totals.get('samavftta_etc', 0.0) + _etc_elapsed - _etc_inner_delta
|
|
1723
|
+
)
|
|
1724
|
+
|
|
1546
1725
|
if success_samavftta_etc and Vrs.identification_score >= 8:
|
|
1547
1726
|
return 1
|
|
1548
1727
|
# i.e., if upajāti or anything imperfect, also continue on to check jāti
|
|
@@ -1568,6 +1747,8 @@ class VerseTester(object):
|
|
|
1568
1747
|
# imperfect pass: deferred Levenshtein annotation for samavftta length errors.
|
|
1569
1748
|
if self._samavftta_has_length_error:
|
|
1570
1749
|
timed('lev_samavftta')(self.evaluate_samavftta)(Vrs)
|
|
1750
|
+
if self._upajAti_needs_lev:
|
|
1751
|
+
timed('lev_upajAti')(self.evaluate_upajAti)(Vrs, perfect_only=False)
|
|
1571
1752
|
|
|
1572
1753
|
if success_anuzwuB or success_samavftta_etc or success_jAti or Vrs.identification_score >= meter_scores["ardhasamavṛtta, perfect"]:
|
|
1573
1754
|
return 1
|
|
@@ -1782,12 +1963,9 @@ class MeterIdentifier(object):
|
|
|
1782
1963
|
n_breaks = n_pAdas - 1
|
|
1783
1964
|
total = len(syllable_list)
|
|
1784
1965
|
|
|
1785
|
-
# Seed each break: prefer user-provided positions, fall back to canonical.
|
|
1786
1966
|
canonical_seeds = [pada_len * (i + 1) for i in range(n_breaks)]
|
|
1787
1967
|
seeds = list(user_seeds) if user_seeds else canonical_seeds
|
|
1788
1968
|
|
|
1789
|
-
# For each break, build the list of candidate positions:
|
|
1790
|
-
# either locked to seed (keep_midpoint) or all positions in [seed-tol, seed+tol].
|
|
1791
1969
|
def candidates(break_idx):
|
|
1792
1970
|
seed = seeds[break_idx]
|
|
1793
1971
|
if break_idx in keep_mid_breaks:
|
|
@@ -1826,9 +2004,7 @@ class MeterIdentifier(object):
|
|
|
1826
2004
|
seg_len = pos - prev
|
|
1827
2005
|
if not (pada_len - tol <= seg_len <= pada_len + tol):
|
|
1828
2006
|
continue
|
|
1829
|
-
# check remaining syllables can form valid pādas
|
|
1830
2007
|
remaining = total - pos
|
|
1831
|
-
remaining_breaks = n_breaks - break_idx - 1
|
|
1832
2008
|
remaining_pAdas = n_pAdas - break_idx - 1
|
|
1833
2009
|
min_remaining = remaining_pAdas * (pada_len - tol)
|
|
1834
2010
|
max_remaining = remaining_pAdas * (pada_len + tol)
|
|
@@ -1848,18 +2024,14 @@ class MeterIdentifier(object):
|
|
|
1848
2024
|
pada_len = 8
|
|
1849
2025
|
n_breaks = 5
|
|
1850
2026
|
|
|
1851
|
-
# Derive user seeds from punctuation/newlines when available,
|
|
1852
|
-
# mirroring the seeding logic in wiggle_identify.
|
|
1853
2027
|
user_seeds = None
|
|
1854
2028
|
if len(newline_indices) == n_breaks:
|
|
1855
2029
|
if resplit_option in ('none', 'resplit_lite'):
|
|
1856
|
-
# all breaks provided — seed all five from user positions
|
|
1857
2030
|
user_seeds = [
|
|
1858
2031
|
text_syllabified[:newline_indices[i]].count(scansion_syllable_separator)
|
|
1859
2032
|
for i in range(n_breaks)
|
|
1860
2033
|
]
|
|
1861
2034
|
elif resplit_option == 'resplit_max' and VrsTster.resplit_keep_midpoint:
|
|
1862
|
-
# seed bc (idx 1) and de (idx 3) from user positions, wiggle the rest
|
|
1863
2035
|
canonical = [pada_len * (i + 1) for i in range(n_breaks)]
|
|
1864
2036
|
canonical[1] = text_syllabified[:newline_indices[1]].count(scansion_syllable_separator)
|
|
1865
2037
|
canonical[3] = text_syllabified[:newline_indices[3]].count(scansion_syllable_separator)
|
|
@@ -1936,10 +2108,7 @@ class MeterIdentifier(object):
|
|
|
1936
2108
|
self.Scanner = S = Sc()
|
|
1937
2109
|
|
|
1938
2110
|
if _DEBUG_TIMING:
|
|
1939
|
-
|
|
1940
|
-
'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
|
|
1941
|
-
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
1942
|
-
_pre = {k: _section_totals.get(k, 0.0) for k in _pre_keys}
|
|
2111
|
+
_pre = {k: _section_totals.get(k, 0.0) for k in _TIMING_KEYS}
|
|
1943
2112
|
|
|
1944
2113
|
# gets back mostly populated Verse object
|
|
1945
2114
|
V = S.scan(rw_str, from_scheme=from_scheme)
|
|
@@ -2058,7 +2227,7 @@ class MeterIdentifier(object):
|
|
|
2058
2227
|
best_total_dist = total_dist
|
|
2059
2228
|
best_entry = (_stash_wbp, _label, _odd_can, _even_can, _stash_tsyl, _stash_gaRa, _stash_morae)
|
|
2060
2229
|
if best_entry is not None:
|
|
2061
|
-
ardha_score = meter_scores["ardhasamavṛtta, imperfect"] - (best_total_dist -
|
|
2230
|
+
ardha_score = meter_scores["ardhasamavṛtta, imperfect"] - (best_total_dist - meter_scores["levenshtein distance penalty"])
|
|
2062
2231
|
if ardha_score > best_current_score:
|
|
2063
2232
|
best_stash_wbp, best_label, best_odd_can, best_even_can, best_stash_tsyl, best_stash_gaRa, best_stash_morae = best_entry
|
|
2064
2233
|
problem_syllables = {}
|
|
@@ -2124,7 +2293,7 @@ class MeterIdentifier(object):
|
|
|
2124
2293
|
best_total_dist = total_dist
|
|
2125
2294
|
best_entry = (_wbp, _label, _canonicals, _tsyl, _gaRa, _morae)
|
|
2126
2295
|
if best_entry is not None:
|
|
2127
|
-
vizama_score = meter_scores["viṣamavṛtta, imperfect"] - (best_total_dist -
|
|
2296
|
+
vizama_score = meter_scores["viṣamavṛtta, imperfect"] - (best_total_dist - meter_scores["levenshtein distance penalty"])
|
|
2128
2297
|
if vizama_score > best_current_score:
|
|
2129
2298
|
best_wbp, best_label, best_canonicals, best_tsyl, best_gaRa, best_morae = best_entry
|
|
2130
2299
|
problem_syllables = {}
|
|
@@ -2181,11 +2350,8 @@ class MeterIdentifier(object):
|
|
|
2181
2350
|
V.identification_score = meter_scores["none found"]
|
|
2182
2351
|
|
|
2183
2352
|
if _DEBUG_TIMING:
|
|
2184
|
-
|
|
2185
|
-
|
|
2186
|
-
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
2187
|
-
verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in all_keys}
|
|
2188
|
-
verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
|
|
2353
|
+
verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in _TIMING_KEYS}
|
|
2354
|
+
verse_times['scan'] = sum(verse_times[k] for k in _SCAN_KEYS)
|
|
2189
2355
|
cat = _meter_label_to_category(V.meter_label)
|
|
2190
2356
|
bucket = _category_totals.setdefault(cat, {})
|
|
2191
2357
|
for k, v in verse_times.items():
|
|
@@ -2241,11 +2407,8 @@ def _identify_meter_worker(args):
|
|
|
2241
2407
|
import skrutable.meter_identification as _mi
|
|
2242
2408
|
_mi._DEBUG_TIMING = True
|
|
2243
2409
|
MI = MeterIdentifier()
|
|
2244
|
-
all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
2245
|
-
'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
|
|
2246
|
-
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
2247
2410
|
if debug_timing:
|
|
2248
|
-
pre = {k: _section_totals.get(k, 0.0) for k in
|
|
2411
|
+
pre = {k: _section_totals.get(k, 0.0) for k in _TIMING_KEYS}
|
|
2249
2412
|
pre_wiggle = _section_totals.get('wiggle_count', 0)
|
|
2250
2413
|
V = MI.identify_meter(
|
|
2251
2414
|
rw_str,
|
|
@@ -2254,8 +2417,8 @@ def _identify_meter_worker(args):
|
|
|
2254
2417
|
from_scheme=from_scheme,
|
|
2255
2418
|
)
|
|
2256
2419
|
if debug_timing:
|
|
2257
|
-
verse_times = {k: _section_totals.get(k, 0.0) - pre[k] for k in
|
|
2258
|
-
verse_times['scan'] = sum(verse_times[k] for k in
|
|
2420
|
+
verse_times = {k: _section_totals.get(k, 0.0) - pre[k] for k in _TIMING_KEYS}
|
|
2421
|
+
verse_times['scan'] = sum(verse_times[k] for k in _SCAN_KEYS)
|
|
2259
2422
|
verse_times['wiggle_count'] = _section_totals.get('wiggle_count', 0) - pre_wiggle
|
|
2260
2423
|
cat = _meter_label_to_category(V.meter_label)
|
|
2261
2424
|
return V, verse_times, cat
|
|
@@ -313,8 +313,8 @@ samavfttas_by_family_and_gaRa = {
|
|
|
313
313
|
22: {
|
|
314
314
|
'mmtnnns(g|l)' : 'haṃsī', # also mmggnnnngg
|
|
315
315
|
'tByjsrn(g|l)' : 'aśvadhāṭī',
|
|
316
|
-
'Brnrnrn(g|l)' : 'madraka'
|
|
317
|
-
},
|
|
316
|
+
'Brnrnrn(g|l)' : 'madraka',
|
|
317
|
+
},
|
|
318
318
|
|
|
319
319
|
23: {
|
|
320
320
|
'njBjBjBl(g|l)' : 'adritanayā',
|
|
@@ -40,6 +40,7 @@ class Verse(object):
|
|
|
40
40
|
self.meter_label = None # string
|
|
41
41
|
self.identification_score = 0 # int
|
|
42
42
|
self.diagnostic = None # Diagnostic or dict of Diagnostics, set by meter_identification
|
|
43
|
+
self.alternatives = [] # list of {'meter_label': str, 'diagnostic': ...} for atha-vā ties
|
|
43
44
|
|
|
44
45
|
def summarize(self,
|
|
45
46
|
show_weights=True, show_morae=True, show_gaRas=True, # part_A
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.7.0"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|