skrutable 2.6.3__tar.gz → 2.7.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skrutable-2.6.3 → skrutable-2.7.0}/PKG-INFO +1 -1
- skrutable-2.7.0/src/skrutable/__init__.py +1 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/config.json +7 -3
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/meter_identification.py +243 -46
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/meter_patterns.py +3 -2
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable.egg-info/PKG-INFO +1 -1
- skrutable-2.6.3/src/skrutable/__init__.py +0 -1
- {skrutable-2.6.3 → skrutable-2.7.0}/LICENSE.md +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/README.md +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/setup.cfg +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/setup.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/config.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/generate_scheme_vectors.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/impossible_bigrams.json +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/manual.md +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/phonemes.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/run_examples.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/scansion.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/scheme_vectors.json +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/splitting.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/transliteration.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/utils.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable.egg-info/SOURCES.txt +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.6.3 → skrutable-2.7.0}/src/skrutable.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.7.0"
|
|
@@ -15,10 +15,14 @@
|
|
|
15
15
|
"anuṣṭubh, full, both halves imperfect)": 5,
|
|
16
16
|
"anuṣṭubh, full, one half perfect, one length error)": 6,
|
|
17
17
|
"anuṣṭubh, full, one half imperfect, one length error)": 4,
|
|
18
|
-
"anuṣṭubh,
|
|
19
|
-
"anuṣṭubh,
|
|
18
|
+
"anuṣṭubh, 1 or 3 halves, all halves perfect)" : 9,
|
|
19
|
+
"anuṣṭubh, 1 or 3 halves, one imperfect)": 7,
|
|
20
|
+
"anuṣṭubh, 1 or 3 halves, two imperfect)": 6,
|
|
21
|
+
"anuṣṭubh, 1 or 3 halves, some perfect some length error)": 6,
|
|
22
|
+
"anuṣṭubh, 1 or 3 halves, at least one half imperfect)": 5,
|
|
23
|
+
"anuṣṭubh, 1 or 3 halves, some imperfect some length error)": 4,
|
|
20
24
|
"samavṛtta, perfect" : 9,
|
|
21
|
-
"samavṛtta, imperfect (3)" :
|
|
25
|
+
"samavṛtta, imperfect (3)" : 7,
|
|
22
26
|
"samavṛtta, imperfect (2)" : 5,
|
|
23
27
|
"samavṛtta, quarter, perfect" : 8,
|
|
24
28
|
"ardhasamavṛtta, perfect" : 9,
|
|
@@ -66,9 +66,9 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
|
|
|
66
66
|
return
|
|
67
67
|
import sys, os
|
|
68
68
|
scan_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana')
|
|
69
|
-
type_keys = ('anuzwuB', 'samavftta_etc', 'samavftta', 'upajAti', 'ardhasamavftta_perfect', 'vizamavftta', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama')
|
|
69
|
+
type_keys = ('anuzwuB', 'ardhatraya', 'samavftta_etc', 'samavftta', 'upajAti', 'ardhasamavftta_perfect', 'vizamavftta', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama')
|
|
70
70
|
type_abbrev = {
|
|
71
|
-
'anuzwuB': 'anuṣṭ', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
|
|
71
|
+
'anuzwuB': 'anuṣṭ', 'ardhatraya': 'anuṣṭ3', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
|
|
72
72
|
'ardhasamavftta_perfect': 'ardha✓', 'vizamavftta': 'vizama',
|
|
73
73
|
'jAti': 'jāti',
|
|
74
74
|
'lev_samavftta': 'lev✗sama', 'lev_ardha': 'lev✗ardh', 'lev_vizama': 'lev✗visa',
|
|
@@ -91,7 +91,8 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
|
|
|
91
91
|
|
|
92
92
|
n_verses = sum(b.get('_count', 0) for b in _category_totals.values())
|
|
93
93
|
wiggle_count = _section_totals.get('wiggle_count', 0)
|
|
94
|
-
|
|
94
|
+
ardhatraya_gate_count = _section_totals.get('ardhatraya_gate_count', 0)
|
|
95
|
+
lines = [f'\n=== {n_verses} verses / {wiggle_count} resplit candidates / {ardhatraya_gate_count} ardhatraya gate hits ===']
|
|
95
96
|
hdr = (' ' + 'category'.ljust(col_cat_w)
|
|
96
97
|
+ 'perf'.rjust(count_w) + 'impf'.rjust(count_w)
|
|
97
98
|
+ 'total'.rjust(sub_w) + 'scan∑'.rjust(sub_w) + 'types∑'.rjust(sub_w)
|
|
@@ -536,14 +537,14 @@ class VerseTester(object):
|
|
|
536
537
|
return None
|
|
537
538
|
if ardham_eva_result.perfect():
|
|
538
539
|
Vrs.meter_label = f"anuṣṭubh (ardham eva: {ardham_eva_result.perfect_id_label})"
|
|
539
|
-
Vrs.identification_score = meter_scores["anuṣṭubh,
|
|
540
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, all halves perfect)"]
|
|
540
541
|
Vrs.is_perfect = True
|
|
541
542
|
Vrs.diagnostic = ardham_eva_result
|
|
542
543
|
return ardham_eva_result
|
|
543
544
|
elif ardham_eva_result.imperfect():
|
|
544
545
|
label_str = '; '.join(ardham_eva_result.imperfect_label_sanskrit.values())
|
|
545
546
|
Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
|
|
546
|
-
Vrs.identification_score = meter_scores["anuṣṭubh,
|
|
547
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, at least one half imperfect)"]
|
|
547
548
|
Vrs.is_perfect = False
|
|
548
549
|
Vrs.diagnostic = ardham_eva_result
|
|
549
550
|
return ardham_eva_result
|
|
@@ -691,12 +692,10 @@ class VerseTester(object):
|
|
|
691
692
|
imperfect_note = None
|
|
692
693
|
|
|
693
694
|
if self.pAdasamatva_count == 3:
|
|
694
|
-
imperfect_note =
|
|
695
|
-
meter_label += " (%s)" % imperfect_note
|
|
695
|
+
imperfect_note = True
|
|
696
696
|
score = meter_scores["samavṛtta, imperfect (3)"]
|
|
697
697
|
elif self.pAdasamatva_count == 2:
|
|
698
|
-
imperfect_note =
|
|
699
|
-
meter_label += " (%s)" % imperfect_note
|
|
698
|
+
imperfect_note = True
|
|
700
699
|
score = meter_scores["samavṛtta, imperfect (2)"]
|
|
701
700
|
elif self.pAdasamatva_count == 0:
|
|
702
701
|
imperfect_note = "1 eva pādaḥ"
|
|
@@ -759,12 +758,10 @@ class VerseTester(object):
|
|
|
759
758
|
problem_syllables=problem_syllables or None,
|
|
760
759
|
)
|
|
761
760
|
else:
|
|
762
|
-
# fewer than 4 matching pādas; append
|
|
763
|
-
length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()
|
|
764
|
-
full_imperfect_str = imperfect_note
|
|
761
|
+
# fewer than 4 matching pādas; append per-pāda notes to the meter_label
|
|
762
|
+
length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
|
|
765
763
|
if length_notes:
|
|
766
|
-
|
|
767
|
-
meter_label = meter_label.replace(f"({imperfect_note})", f"({full_imperfect_str})")
|
|
764
|
+
meter_label += " (%s)" % "; ".join(length_notes)
|
|
768
765
|
diagnostic = Diagnostic(
|
|
769
766
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
770
767
|
imperfect_label_english=per_pada_english or None,
|
|
@@ -773,7 +770,7 @@ class VerseTester(object):
|
|
|
773
770
|
|
|
774
771
|
# score arbitration: may tie with pre-existing result (e.g., upajāti)
|
|
775
772
|
old_score = Vrs.identification_score
|
|
776
|
-
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=imperfect_note
|
|
773
|
+
self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=not imperfect_note and not has_any_error)
|
|
777
774
|
if score >= old_score:
|
|
778
775
|
Vrs.diagnostic = diagnostic
|
|
779
776
|
|
|
@@ -958,14 +955,13 @@ class VerseTester(object):
|
|
|
958
955
|
|
|
959
956
|
meter_labels.append(meter_label)
|
|
960
957
|
|
|
961
|
-
unique_meter_labels =
|
|
958
|
+
unique_meter_labels = sorted(set(meter_labels)) # de-dupe, stable order
|
|
962
959
|
combined_meter_labels = ', '.join(unique_meter_labels)
|
|
963
960
|
|
|
964
961
|
# Assign score based on how complete and homogeneous the match is.
|
|
965
962
|
family = meter_patterns.samavftta_family_names[wbp_lens[0]] if wbp_lens[0] < 27 else 'daṇḍaka'
|
|
966
|
-
unique_meter_labels_copy = unique_meter_labels; unique_meter_labels_copy.sort()
|
|
967
963
|
if (family == "triṣṭubh" and
|
|
968
|
-
|
|
964
|
+
unique_meter_labels == ['indravajrā [11: ttjgg]', 'upendravajrā [11: jtjgg]']
|
|
969
965
|
):
|
|
970
966
|
family = '' # clearer not to specify in this case
|
|
971
967
|
|
|
@@ -989,19 +985,12 @@ class VerseTester(object):
|
|
|
989
985
|
if all(lbl.startswith('ajñātam') for lbl in meter_labels):
|
|
990
986
|
score -= 1
|
|
991
987
|
|
|
992
|
-
imperfect_note =
|
|
988
|
+
imperfect_note = len(wbp_lens) != 4 and unique_sorted_lens != [11, 12]
|
|
993
989
|
overall_meter_label = "upajāti %s: %s" % (
|
|
994
990
|
family,
|
|
995
991
|
combined_meter_labels
|
|
996
992
|
)
|
|
997
993
|
|
|
998
|
-
if (
|
|
999
|
-
len(wbp_lens) != 4 and
|
|
1000
|
-
unique_sorted_lens != [11, 12]
|
|
1001
|
-
): # not perfect and also not triṣṭubh-jagatī-saṃkara
|
|
1002
|
-
imperfect_note = "? %d eva pādāḥ yuktāḥ" % len(wbp_lens)
|
|
1003
|
-
overall_meter_label += " (%s)" % imperfect_note
|
|
1004
|
-
|
|
1005
994
|
# Build diagnostic: excluded pādas are flagged as hyper/hypometric relative
|
|
1006
995
|
# to the majority length; included pādas contribute no error entry.
|
|
1007
996
|
most_freq_len = wbp_lens[0] if wbp_lens else None
|
|
@@ -1018,11 +1007,14 @@ class VerseTester(object):
|
|
|
1018
1007
|
per_pada_sanskrit[pada_num] = 'adhikākṣarā' if hyper else 'ūnākṣarā'
|
|
1019
1008
|
per_pada_english[pada_num] = 'hypermetric' if hyper else 'hypometric'
|
|
1020
1009
|
|
|
1021
|
-
|
|
1022
|
-
|
|
1010
|
+
# Append per-pāda imperfect notes to label.
|
|
1011
|
+
length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
|
|
1012
|
+
if length_notes:
|
|
1013
|
+
overall_meter_label += " (%s)" % "; ".join(length_notes)
|
|
1014
|
+
|
|
1015
|
+
if not per_pada_english and not imperfect_note:
|
|
1023
1016
|
diagnostic = Diagnostic(perfect_id_label=overall_meter_label)
|
|
1024
|
-
elif imperfect_note
|
|
1025
|
-
# all four pādas included but some have length errors
|
|
1017
|
+
elif not imperfect_note:
|
|
1026
1018
|
diagnostic = Diagnostic(
|
|
1027
1019
|
perfect_id_label=overall_meter_label,
|
|
1028
1020
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
@@ -1030,11 +1022,6 @@ class VerseTester(object):
|
|
|
1030
1022
|
problem_syllables=problem_syllables or None,
|
|
1031
1023
|
)
|
|
1032
1024
|
else:
|
|
1033
|
-
# fewer than 4 pādas included; append length notes to the meter_label
|
|
1034
|
-
length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
|
|
1035
|
-
if length_notes:
|
|
1036
|
-
full_imperfect_str = imperfect_note + "; " + "; ".join(length_notes)
|
|
1037
|
-
overall_meter_label = overall_meter_label.replace(f"({imperfect_note})", f"({full_imperfect_str})")
|
|
1038
1025
|
diagnostic = Diagnostic(
|
|
1039
1026
|
imperfect_label_sanskrit=per_pada_sanskrit or None,
|
|
1040
1027
|
imperfect_label_english=per_pada_english or None,
|
|
@@ -1043,7 +1030,8 @@ class VerseTester(object):
|
|
|
1043
1030
|
|
|
1044
1031
|
# score arbitration: may tie with pre-existing result (e.g., samavṛtta)
|
|
1045
1032
|
old_score = Vrs.identification_score
|
|
1046
|
-
|
|
1033
|
+
is_perfect = not imperfect_note and not per_pada_english
|
|
1034
|
+
self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=is_perfect)
|
|
1047
1035
|
if score >= old_score:
|
|
1048
1036
|
Vrs.diagnostic = diagnostic
|
|
1049
1037
|
|
|
@@ -1586,6 +1574,63 @@ class VerseTester(object):
|
|
|
1586
1574
|
else:
|
|
1587
1575
|
return 0
|
|
1588
1576
|
|
|
1577
|
+
def attempt_ardhatraya_identification(self, Vrs):
|
|
1578
|
+
"""
|
|
1579
|
+
Identification for 6-pāda input (3 ardhas = 1.5 anuṣṭubh verses).
|
|
1580
|
+
Tests each ardha (pāda pair) independently with test_as_anuzwuB_half,
|
|
1581
|
+
then assembles a combined label and score from all three results.
|
|
1582
|
+
Returns 1 if identified, 0 otherwise.
|
|
1583
|
+
"""
|
|
1584
|
+
|
|
1585
|
+
w_p = Vrs.syllable_weights.split('\n')
|
|
1586
|
+
if len(w_p) < 6:
|
|
1587
|
+
return 0
|
|
1588
|
+
|
|
1589
|
+
r1 = self.test_as_anuzwuB_half(w_p[0], w_p[1])
|
|
1590
|
+
r2 = self.test_as_anuzwuB_half(w_p[2], w_p[3])
|
|
1591
|
+
r3 = self.test_as_anuzwuB_half(w_p[4], w_p[5])
|
|
1592
|
+
|
|
1593
|
+
if r1 is None or r2 is None or r3 is None:
|
|
1594
|
+
return 0
|
|
1595
|
+
|
|
1596
|
+
def _ardha_label(r):
|
|
1597
|
+
if r.perfect():
|
|
1598
|
+
return r.perfect_id_label
|
|
1599
|
+
else:
|
|
1600
|
+
return '; '.join(r.imperfect_label_sanskrit.values())
|
|
1601
|
+
|
|
1602
|
+
l1, l2, l3 = _ardha_label(r1), _ardha_label(r2), _ardha_label(r3)
|
|
1603
|
+
Vrs.meter_label = f"anuṣṭubh (1,2: {l1}; 3,4: {l2}; 5,6: {l3})"
|
|
1604
|
+
|
|
1605
|
+
results = [r1, r2, r3]
|
|
1606
|
+
n_perfect = sum(1 for r in results if r.perfect())
|
|
1607
|
+
n_length_error = sum(1 for r in results if r.length_error())
|
|
1608
|
+
n_imperfect = 3 - n_perfect - n_length_error
|
|
1609
|
+
|
|
1610
|
+
if n_length_error == 0 and n_imperfect == 0:
|
|
1611
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, all halves perfect)"]
|
|
1612
|
+
Vrs.is_perfect = True
|
|
1613
|
+
elif n_length_error == 0:
|
|
1614
|
+
if n_imperfect == 3:
|
|
1615
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, at least one half imperfect)"]
|
|
1616
|
+
elif n_imperfect == 2:
|
|
1617
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, two imperfect)"]
|
|
1618
|
+
else:
|
|
1619
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, one imperfect)"]
|
|
1620
|
+
Vrs.is_perfect = False
|
|
1621
|
+
elif n_perfect > 0 and n_imperfect == 0:
|
|
1622
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some perfect some length error)"]
|
|
1623
|
+
Vrs.is_perfect = False
|
|
1624
|
+
elif n_imperfect > 0 and n_perfect == 0:
|
|
1625
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some imperfect some length error)"]
|
|
1626
|
+
Vrs.is_perfect = False
|
|
1627
|
+
else:
|
|
1628
|
+
Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some perfect some length error)"]
|
|
1629
|
+
Vrs.is_perfect = False
|
|
1630
|
+
|
|
1631
|
+
Vrs.diagnostic = {'ab': r1, 'cd': r2, 'ef': r3}
|
|
1632
|
+
return 1
|
|
1633
|
+
|
|
1589
1634
|
|
|
1590
1635
|
class MeterIdentifier(object):
|
|
1591
1636
|
"""
|
|
@@ -1611,6 +1656,8 @@ class MeterIdentifier(object):
|
|
|
1611
1656
|
"""
|
|
1612
1657
|
|
|
1613
1658
|
iter_list = [start_pos]
|
|
1659
|
+
if resplit_option == 'none':
|
|
1660
|
+
return iter_list
|
|
1614
1661
|
if resplit_option == 'resplit_max':
|
|
1615
1662
|
distance_multiplier = 0.50 # wiggle as far as 50% of part_len
|
|
1616
1663
|
elif resplit_option == 'resplit_lite':
|
|
@@ -1696,6 +1743,141 @@ class MeterIdentifier(object):
|
|
|
1696
1743
|
return Verses_found
|
|
1697
1744
|
|
|
1698
1745
|
|
|
1746
|
+
def resplit_Verse_ardhatraya(self, syllable_list, ab_br, bc_br, cd_br, de_br, ef_br):
|
|
1747
|
+
syllable_list = list(syllable_list)
|
|
1748
|
+
_fix_conjunct_pada_boundaries(syllable_list, [ab_br, cd_br, ef_br])
|
|
1749
|
+
sss = scansion_syllable_separator
|
|
1750
|
+
return (sss.join(syllable_list[:ab_br]) + '\n'
|
|
1751
|
+
+ sss.join(syllable_list[ab_br:bc_br]) + '\n'
|
|
1752
|
+
+ sss.join(syllable_list[bc_br:cd_br]) + '\n'
|
|
1753
|
+
+ sss.join(syllable_list[cd_br:de_br]) + '\n'
|
|
1754
|
+
+ sss.join(syllable_list[de_br:ef_br]) + '\n'
|
|
1755
|
+
+ sss.join(syllable_list[ef_br:])
|
|
1756
|
+
)
|
|
1757
|
+
|
|
1758
|
+
def constrained_resplit_identify(self, Vrs, syllable_list, VrsTster,
|
|
1759
|
+
n_pAdas, pada_len, resplit_option,
|
|
1760
|
+
resplit_func, attempt_func,
|
|
1761
|
+
keep_mid_breaks=None, user_seeds=None):
|
|
1762
|
+
"""
|
|
1763
|
+
Constrained resplit enumerator for known-structure meters.
|
|
1764
|
+
|
|
1765
|
+
Rather than wiggling break positions freely, generates only splits where
|
|
1766
|
+
every pāda length falls within [pada_len - tol, pada_len + tol], where
|
|
1767
|
+
tol = 1 for resplit_lite/none, 2 for resplit_max.
|
|
1768
|
+
|
|
1769
|
+
n_pAdas: number of pādas expected (e.g. 6 for ardhatraya, 4 for samavṛtta)
|
|
1770
|
+
pada_len: canonical pāda length in syllables
|
|
1771
|
+
resplit_func: callable(syllable_list, *break_positions) → text_syllabified
|
|
1772
|
+
attempt_func: callable(Vrs) → 0 or 1
|
|
1773
|
+
keep_mid_breaks: set of 0-indexed break indices to lock to seed position
|
|
1774
|
+
(e.g. {1, 3} for ardhatraya bc/de when resplit_keep_midpoint)
|
|
1775
|
+
user_seeds: list of break positions derived from user-provided punctuation/newlines;
|
|
1776
|
+
overrides canonical pada_len-based seeds where provided
|
|
1777
|
+
|
|
1778
|
+
Returns a list for MeterIdentifier.Verses_found.
|
|
1779
|
+
"""
|
|
1780
|
+
tol = 1 if resplit_option in ('none', 'resplit_lite') else 2
|
|
1781
|
+
keep_mid_breaks = keep_mid_breaks or set()
|
|
1782
|
+
n_breaks = n_pAdas - 1
|
|
1783
|
+
total = len(syllable_list)
|
|
1784
|
+
|
|
1785
|
+
# Seed each break: prefer user-provided positions, fall back to canonical.
|
|
1786
|
+
canonical_seeds = [pada_len * (i + 1) for i in range(n_breaks)]
|
|
1787
|
+
seeds = list(user_seeds) if user_seeds else canonical_seeds
|
|
1788
|
+
|
|
1789
|
+
# For each break, build the list of candidate positions:
|
|
1790
|
+
# either locked to seed (keep_midpoint) or all positions in [seed-tol, seed+tol].
|
|
1791
|
+
def candidates(break_idx):
|
|
1792
|
+
seed = seeds[break_idx]
|
|
1793
|
+
if break_idx in keep_mid_breaks:
|
|
1794
|
+
return [seed]
|
|
1795
|
+
return list(range(seed - tol, seed + tol + 1))
|
|
1796
|
+
|
|
1797
|
+
S = Sc()
|
|
1798
|
+
Verses_found = []
|
|
1799
|
+
|
|
1800
|
+
def _recurse(break_idx, chosen):
|
|
1801
|
+
if break_idx == n_breaks:
|
|
1802
|
+
try:
|
|
1803
|
+
new_text_syllabified = resplit_func(syllable_list, *chosen)
|
|
1804
|
+
temp_V = copy(Vrs)
|
|
1805
|
+
temp_V.text_syllabified = new_text_syllabified
|
|
1806
|
+
if _DEBUG_TIMING:
|
|
1807
|
+
_section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + 1
|
|
1808
|
+
temp_V.syllable_weights = timed('scan_weights')(S.scan_syllable_weights)(
|
|
1809
|
+
temp_V.text_syllabified)
|
|
1810
|
+
temp_V.morae_per_line = timed('scan_morae_gana')(S.count_morae)(
|
|
1811
|
+
temp_V.syllable_weights)
|
|
1812
|
+
temp_V.gaRa_abbreviations = timed('scan_morae_gana')(
|
|
1813
|
+
lambda: '\n'.join([S.gaRa_abbreviate(line) for line in temp_V.syllable_weights.split('\n')])
|
|
1814
|
+
)()
|
|
1815
|
+
success = attempt_func(temp_V)
|
|
1816
|
+
if success:
|
|
1817
|
+
Verses_found.append(temp_V)
|
|
1818
|
+
if temp_V.identification_score == meter_scores["max score"]:
|
|
1819
|
+
return True # signal early exit
|
|
1820
|
+
except IndexError:
|
|
1821
|
+
pass
|
|
1822
|
+
return False
|
|
1823
|
+
|
|
1824
|
+
prev = chosen[-1] if chosen else 0
|
|
1825
|
+
for pos in candidates(break_idx):
|
|
1826
|
+
seg_len = pos - prev
|
|
1827
|
+
if not (pada_len - tol <= seg_len <= pada_len + tol):
|
|
1828
|
+
continue
|
|
1829
|
+
# check remaining syllables can form valid pādas
|
|
1830
|
+
remaining = total - pos
|
|
1831
|
+
remaining_breaks = n_breaks - break_idx - 1
|
|
1832
|
+
remaining_pAdas = n_pAdas - break_idx - 1
|
|
1833
|
+
min_remaining = remaining_pAdas * (pada_len - tol)
|
|
1834
|
+
max_remaining = remaining_pAdas * (pada_len + tol)
|
|
1835
|
+
if not (min_remaining <= remaining <= max_remaining):
|
|
1836
|
+
continue
|
|
1837
|
+
if _recurse(break_idx + 1, chosen + [pos]):
|
|
1838
|
+
return True # propagate early exit
|
|
1839
|
+
return False
|
|
1840
|
+
|
|
1841
|
+
_recurse(0, [])
|
|
1842
|
+
return Verses_found
|
|
1843
|
+
|
|
1844
|
+
def wiggle_identify_ardhatraya(self, Vrs, syllable_list, VrsTster,
|
|
1845
|
+
newline_indices, text_syllabified):
|
|
1846
|
+
"""Constrained resplit for 6-pāda (3-ardha) anuṣṭubh."""
|
|
1847
|
+
resplit_option = VrsTster.resplit_option
|
|
1848
|
+
pada_len = 8
|
|
1849
|
+
n_breaks = 5
|
|
1850
|
+
|
|
1851
|
+
# Derive user seeds from punctuation/newlines when available,
|
|
1852
|
+
# mirroring the seeding logic in wiggle_identify.
|
|
1853
|
+
user_seeds = None
|
|
1854
|
+
if len(newline_indices) == n_breaks:
|
|
1855
|
+
if resplit_option in ('none', 'resplit_lite'):
|
|
1856
|
+
# all breaks provided — seed all five from user positions
|
|
1857
|
+
user_seeds = [
|
|
1858
|
+
text_syllabified[:newline_indices[i]].count(scansion_syllable_separator)
|
|
1859
|
+
for i in range(n_breaks)
|
|
1860
|
+
]
|
|
1861
|
+
elif resplit_option == 'resplit_max' and VrsTster.resplit_keep_midpoint:
|
|
1862
|
+
# seed bc (idx 1) and de (idx 3) from user positions, wiggle the rest
|
|
1863
|
+
canonical = [pada_len * (i + 1) for i in range(n_breaks)]
|
|
1864
|
+
canonical[1] = text_syllabified[:newline_indices[1]].count(scansion_syllable_separator)
|
|
1865
|
+
canonical[3] = text_syllabified[:newline_indices[3]].count(scansion_syllable_separator)
|
|
1866
|
+
user_seeds = canonical
|
|
1867
|
+
|
|
1868
|
+
keep_mid = {1, 3} if VrsTster.resplit_keep_midpoint else set()
|
|
1869
|
+
|
|
1870
|
+
return self.constrained_resplit_identify(
|
|
1871
|
+
Vrs, syllable_list, VrsTster,
|
|
1872
|
+
n_pAdas=6, pada_len=pada_len,
|
|
1873
|
+
resplit_option=resplit_option,
|
|
1874
|
+
resplit_func=self.resplit_Verse_ardhatraya,
|
|
1875
|
+
attempt_func=VrsTster.attempt_ardhatraya_identification,
|
|
1876
|
+
keep_mid_breaks=keep_mid,
|
|
1877
|
+
user_seeds=user_seeds,
|
|
1878
|
+
)
|
|
1879
|
+
|
|
1880
|
+
|
|
1699
1881
|
def find_meter(self, rw_str, from_scheme=None):
|
|
1700
1882
|
|
|
1701
1883
|
self.Scanner = S = Sc()
|
|
@@ -1755,7 +1937,7 @@ class MeterIdentifier(object):
|
|
|
1755
1937
|
|
|
1756
1938
|
if _DEBUG_TIMING:
|
|
1757
1939
|
_pre_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
1758
|
-
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
|
|
1940
|
+
'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
|
|
1759
1941
|
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
1760
1942
|
_pre = {k: _section_totals.get(k, 0.0) for k in _pre_keys}
|
|
1761
1943
|
|
|
@@ -1766,7 +1948,7 @@ class MeterIdentifier(object):
|
|
|
1766
1948
|
self.VerseTester.resplit_option = resplit_option
|
|
1767
1949
|
self.VerseTester.resplit_keep_midpoint = resplit_keep_midpoint
|
|
1768
1950
|
|
|
1769
|
-
if resplit_option
|
|
1951
|
+
if resplit_option == 'single_pAda' or V.text_cleaned == '':
|
|
1770
1952
|
# No resplitting: test the verse exactly as scanned.
|
|
1771
1953
|
VT._ardha_stash = []
|
|
1772
1954
|
VT._vizama_stash = []
|
|
@@ -1778,7 +1960,7 @@ class MeterIdentifier(object):
|
|
|
1778
1960
|
if VT._vizama_stash and meter_scores["viṣamavṛtta, imperfect"] > V.identification_score:
|
|
1779
1961
|
timed('lev_vizama')(VT.is_vizamavftta)(V)
|
|
1780
1962
|
|
|
1781
|
-
elif resplit_option in ['resplit_max', 'resplit_lite']:
|
|
1963
|
+
elif resplit_option in ['none', 'resplit_max', 'resplit_lite']:
|
|
1782
1964
|
|
|
1783
1965
|
# Capture any user-provided pāda breaks (newlines surviving scansion cleaning).
|
|
1784
1966
|
newline_indices = [
|
|
@@ -1804,14 +1986,14 @@ class MeterIdentifier(object):
|
|
|
1804
1986
|
)
|
|
1805
1987
|
|
|
1806
1988
|
if len(newline_indices) == 3:
|
|
1807
|
-
if resplit_option
|
|
1989
|
+
if resplit_option in ('none', 'resplit_lite'):
|
|
1808
1990
|
# all three breaks provided — override all three
|
|
1809
1991
|
pAda_brs['ab'], pAda_brs['bc'], pAda_brs['cd'] = (
|
|
1810
1992
|
V.text_syllabified[:newline_indices[i]].count(
|
|
1811
1993
|
scansion_syllable_separator
|
|
1812
1994
|
) for i in [0, 1, 2]
|
|
1813
1995
|
)
|
|
1814
|
-
elif
|
|
1996
|
+
elif (
|
|
1815
1997
|
resplit_option == 'resplit_max' and
|
|
1816
1998
|
self.VerseTester.resplit_keep_midpoint
|
|
1817
1999
|
):
|
|
@@ -1820,15 +2002,15 @@ class MeterIdentifier(object):
|
|
|
1820
2002
|
scansion_syllable_separator)
|
|
1821
2003
|
|
|
1822
2004
|
elif len(newline_indices) == 1:
|
|
1823
|
-
if
|
|
1824
|
-
resplit_option
|
|
2005
|
+
if (
|
|
2006
|
+
resplit_option in ('none', 'resplit_lite')
|
|
1825
2007
|
) or (
|
|
1826
2008
|
resplit_option == 'resplit_max' and
|
|
1827
2009
|
self.VerseTester.resplit_keep_midpoint
|
|
1828
2010
|
):
|
|
1829
2011
|
# single break provided — treat as bc, wiggle the rest
|
|
1830
2012
|
pAda_brs['bc'] = V.text_syllabified[:newline_indices[0]].count(
|
|
1831
|
-
|
|
2013
|
+
scansion_syllable_separator)
|
|
1832
2014
|
|
|
1833
2015
|
else:
|
|
1834
2016
|
# unusable number of user-provided pāda breaks — use length-based seeds
|
|
@@ -1840,6 +2022,21 @@ class MeterIdentifier(object):
|
|
|
1840
2022
|
pAda_brs, quarter_len
|
|
1841
2023
|
)
|
|
1842
2024
|
|
|
2025
|
+
# --- ardhatraya pass (6-pāda / 3-ardha anuṣṭubh) ---
|
|
2026
|
+
best_4pAda_score = (
|
|
2027
|
+
max(v.identification_score for v in self.Verses_found)
|
|
2028
|
+
if self.Verses_found else 0
|
|
2029
|
+
)
|
|
2030
|
+
_ardhatraya_gate = best_4pAda_score < meter_scores["max score"] and 44 <= total_syll_count <= 52
|
|
2031
|
+
if _DEBUG_TIMING:
|
|
2032
|
+
_section_totals['ardhatraya_gate_count'] = _section_totals.get('ardhatraya_gate_count', 0) + (1 if _ardhatraya_gate else 0)
|
|
2033
|
+
if _ardhatraya_gate:
|
|
2034
|
+
ardhatraya_found = timed('ardhatraya')(self.wiggle_identify_ardhatraya)(
|
|
2035
|
+
V, syllable_list, VT,
|
|
2036
|
+
newline_indices, V.text_syllabified
|
|
2037
|
+
)
|
|
2038
|
+
self.Verses_found.extend(ardhatraya_found)
|
|
2039
|
+
|
|
1843
2040
|
# Post-wiggle: deferred imperfect ardhasamavṛtta pass over accumulated stash.
|
|
1844
2041
|
_lev_ardha_t0 = _time.perf_counter() if _DEBUG_TIMING else None
|
|
1845
2042
|
ardha_stash = VT._ardha_stash
|
|
@@ -1985,7 +2182,7 @@ class MeterIdentifier(object):
|
|
|
1985
2182
|
|
|
1986
2183
|
if _DEBUG_TIMING:
|
|
1987
2184
|
all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
1988
|
-
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
|
|
2185
|
+
'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
|
|
1989
2186
|
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
1990
2187
|
verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in all_keys}
|
|
1991
2188
|
verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
|
|
@@ -2045,7 +2242,7 @@ def _identify_meter_worker(args):
|
|
|
2045
2242
|
_mi._DEBUG_TIMING = True
|
|
2046
2243
|
MI = MeterIdentifier()
|
|
2047
2244
|
all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
2048
|
-
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
|
|
2245
|
+
'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
|
|
2049
2246
|
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
2050
2247
|
if debug_timing:
|
|
2051
2248
|
pre = {k: _section_totals.get(k, 0.0) for k in all_keys}
|
|
@@ -238,7 +238,7 @@ samavfttas_by_family_and_gaRa = {
|
|
|
238
238
|
'jtj(r|B)' : 'vaṃśastha',
|
|
239
239
|
'mmy(y|j)' : 'vaiśvadevī',
|
|
240
240
|
'rrr(r|B)' : 'sragviṇī',
|
|
241
|
-
|
|
241
|
+
'nnm(y|j)' : 'paṭuvṛtta',
|
|
242
242
|
},
|
|
243
243
|
|
|
244
244
|
13: {
|
|
@@ -313,7 +313,8 @@ samavfttas_by_family_and_gaRa = {
|
|
|
313
313
|
22: {
|
|
314
314
|
'mmtnnns(g|l)' : 'haṃsī', # also mmggnnnngg
|
|
315
315
|
'tByjsrn(g|l)' : 'aśvadhāṭī',
|
|
316
|
-
|
|
316
|
+
'Brnrnrn(g|l)' : 'madraka'
|
|
317
|
+
},
|
|
317
318
|
|
|
318
319
|
23: {
|
|
319
320
|
'njBjBjBl(g|l)' : 'adritanayā',
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.6.3"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|