skrutable 2.6.2__tar.gz → 2.7.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {skrutable-2.6.2 → skrutable-2.7.0}/PKG-INFO +1 -1
  2. skrutable-2.7.0/src/skrutable/__init__.py +1 -0
  3. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/config.json +7 -3
  4. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/meter_identification.py +268 -58
  5. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/meter_patterns.py +3 -2
  6. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable.egg-info/PKG-INFO +1 -1
  7. skrutable-2.6.2/src/skrutable/__init__.py +0 -1
  8. {skrutable-2.6.2 → skrutable-2.7.0}/LICENSE.md +0 -0
  9. {skrutable-2.6.2 → skrutable-2.7.0}/README.md +0 -0
  10. {skrutable-2.6.2 → skrutable-2.7.0}/setup.cfg +0 -0
  11. {skrutable-2.6.2 → skrutable-2.7.0}/setup.py +0 -0
  12. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/config.py +0 -0
  13. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/generate_scheme_vectors.py +0 -0
  14. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/impossible_bigrams.json +0 -0
  15. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/manual.md +0 -0
  16. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/phonemes.py +0 -0
  17. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/run_examples.py +0 -0
  18. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/scansion.py +0 -0
  19. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/scheme_detection.py +0 -0
  20. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/scheme_maps.py +0 -0
  21. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/scheme_vectors.json +0 -0
  22. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/scheme_vectors_mbh.py +0 -0
  23. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/splitting.py +0 -0
  24. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/transliteration.py +0 -0
  25. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/utils.py +0 -0
  26. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable/virAma_avoidance.py +0 -0
  27. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable.egg-info/SOURCES.txt +0 -0
  28. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable.egg-info/dependency_links.txt +0 -0
  29. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable.egg-info/requires.txt +0 -0
  30. {skrutable-2.6.2 → skrutable-2.7.0}/src/skrutable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.2
3
+ Version: 2.7.0
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -0,0 +1 @@
1
+ __version__ = "2.7.0"
@@ -15,10 +15,14 @@
15
15
  "anuṣṭubh, full, both halves imperfect)": 5,
16
16
  "anuṣṭubh, full, one half perfect, one length error)": 6,
17
17
  "anuṣṭubh, full, one half imperfect, one length error)": 4,
18
- "anuṣṭubh, half, single half perfect)" : 9,
19
- "anuṣṭubh, half, single half imperfect)": 5,
18
+ "anuṣṭubh, 1 or 3 halves, all halves perfect)" : 9,
19
+ "anuṣṭubh, 1 or 3 halves, one imperfect)": 7,
20
+ "anuṣṭubh, 1 or 3 halves, two imperfect)": 6,
21
+ "anuṣṭubh, 1 or 3 halves, some perfect some length error)": 6,
22
+ "anuṣṭubh, 1 or 3 halves, at least one half imperfect)": 5,
23
+ "anuṣṭubh, 1 or 3 halves, some imperfect some length error)": 4,
20
24
  "samavṛtta, perfect" : 9,
21
- "samavṛtta, imperfect (3)" : 6,
25
+ "samavṛtta, imperfect (3)" : 7,
22
26
  "samavṛtta, imperfect (2)" : 5,
23
27
  "samavṛtta, quarter, perfect" : 8,
24
28
  "ardhasamavṛtta, perfect" : 9,
@@ -66,9 +66,9 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
66
66
  return
67
67
  import sys, os
68
68
  scan_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana')
69
- type_keys = ('anuzwuB', 'samavftta_etc', 'samavftta', 'upajAti', 'ardhasamavftta_perfect', 'vizamavftta', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama')
69
+ type_keys = ('anuzwuB', 'ardhatraya', 'samavftta_etc', 'samavftta', 'upajAti', 'ardhasamavftta_perfect', 'vizamavftta', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama')
70
70
  type_abbrev = {
71
- 'anuzwuB': 'anuṣṭ', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
71
+ 'anuzwuB': 'anuṣṭ', 'ardhatraya': 'anuṣṭ3', 'samavftta_etc': 'vftta↑', 'samavftta': 'samav', 'upajAti': 'upajāti',
72
72
  'ardhasamavftta_perfect': 'ardha✓', 'vizamavftta': 'vizama',
73
73
  'jAti': 'jāti',
74
74
  'lev_samavftta': 'lev✗sama', 'lev_ardha': 'lev✗ardh', 'lev_vizama': 'lev✗visa',
@@ -91,7 +91,8 @@ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_work
91
91
 
92
92
  n_verses = sum(b.get('_count', 0) for b in _category_totals.values())
93
93
  wiggle_count = _section_totals.get('wiggle_count', 0)
94
- lines = [f'\n=== {n_verses} verses / {wiggle_count} resplit candidates ===']
94
+ ardhatraya_gate_count = _section_totals.get('ardhatraya_gate_count', 0)
95
+ lines = [f'\n=== {n_verses} verses / {wiggle_count} resplit candidates / {ardhatraya_gate_count} ardhatraya gate hits ===']
95
96
  hdr = (' ' + 'category'.ljust(col_cat_w)
96
97
  + 'perf'.rjust(count_w) + 'impf'.rjust(count_w)
97
98
  + 'total'.rjust(sub_w) + 'scan∑'.rjust(sub_w) + 'types∑'.rjust(sub_w)
@@ -153,6 +154,8 @@ class Diagnostic:
153
154
  imperfect_label_sanskrit: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); Sanskrit only
154
155
  imperfect_label_english: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); English only
155
156
  problem_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); None if perfect
157
+ notable_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); green-highlighted "interesting/ok" syllables
158
+ notable_label: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); label for the notable feature (same string for skt/eng)
156
159
 
157
160
  def perfect(self):
158
161
  return self.perfect_id_label is not None
@@ -473,17 +476,28 @@ class VerseTester(object):
473
476
  result = None
474
477
  for weights_pattern, label in meter_patterns.anuzwuB_pAda['odd'].items():
475
478
  if re.match(weights_pattern, odd_pAda_weights):
476
- result = Diagnostic(perfect_id_label=label)
479
+ is_vipula = 'vipulā' in label
480
+ result = Diagnostic(
481
+ perfect_id_label=label,
482
+ notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
483
+ notable_label={'odd': label} if is_vipula else None,
484
+ )
477
485
  break
478
486
  if result is None:
479
487
  # Odd pāda matched no perfect pattern — try asamīcīna patterns
480
488
  # before falling back to the generic ya-gaṇa violation label.
481
489
  for weights_pattern, (label, problem_syls, code) in meter_patterns.anuzwuB_pAda_asamIcIna['odd'].items():
482
490
  if re.match(weights_pattern, odd_pAda_weights):
491
+ is_vipula = 'vipulā' in label
492
+ # extract vipulā name from label like "asamīcīnā, ma-vipulāyāḥ pūrvam raḥ syāt"
493
+ vipula_match = re.search(r'\w+-vipulā', label)
494
+ vipula_name = vipula_match.group(0) if vipula_match else None
483
495
  result = Diagnostic(
484
496
  imperfect_label_sanskrit={'odd': label},
485
497
  imperfect_label_english={'odd': code},
486
498
  problem_syllables={'odd': problem_syls},
499
+ notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
500
+ notable_label={'odd': vipula_name} if vipula_name else None,
487
501
  )
488
502
  break
489
503
  if result is None:
@@ -523,14 +537,14 @@ class VerseTester(object):
523
537
  return None
524
538
  if ardham_eva_result.perfect():
525
539
  Vrs.meter_label = f"anuṣṭubh (ardham eva: {ardham_eva_result.perfect_id_label})"
526
- Vrs.identification_score = meter_scores["anuṣṭubh, half, single half perfect)"]
540
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, all halves perfect)"]
527
541
  Vrs.is_perfect = True
528
542
  Vrs.diagnostic = ardham_eva_result
529
543
  return ardham_eva_result
530
544
  elif ardham_eva_result.imperfect():
531
- label_str = '; '.join(f"{k}: {v}" for k, v in ardham_eva_result.imperfect_label_sanskrit.items())
545
+ label_str = '; '.join(ardham_eva_result.imperfect_label_sanskrit.values())
532
546
  Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
533
- Vrs.identification_score = meter_scores["anuṣṭubh, half, single half imperfect)"]
547
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, at least one half imperfect)"]
534
548
  Vrs.is_perfect = False
535
549
  Vrs.diagnostic = ardham_eva_result
536
550
  return ardham_eva_result
@@ -553,14 +567,14 @@ class VerseTester(object):
553
567
  # one half imperfect
554
568
 
555
569
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.perfect():
556
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
570
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
557
571
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
558
572
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
559
573
  Vrs.is_perfect = False
560
574
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
561
575
  return pAdas_ab_result
562
576
  elif pAdas_ab_result.perfect() and pAdas_cd_result.imperfect():
563
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
577
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
564
578
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {cd_str})"
565
579
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
566
580
  Vrs.is_perfect = False
@@ -570,8 +584,8 @@ class VerseTester(object):
570
584
  # both halves imperfect
571
585
 
572
586
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.imperfect():
573
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
574
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
587
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
588
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
575
589
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {cd_str})"
576
590
  Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves imperfect)"]
577
591
  Vrs.is_perfect = False
@@ -581,14 +595,14 @@ class VerseTester(object):
581
595
  # one half perfect, one length error
582
596
 
583
597
  elif pAdas_ab_result.length_error() and pAdas_cd_result.perfect():
584
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
598
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
585
599
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
586
600
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
587
601
  Vrs.is_perfect = False
588
602
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
589
603
  return pAdas_cd_result
590
604
  elif pAdas_ab_result.perfect() and pAdas_cd_result.length_error():
591
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
605
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
592
606
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: ?? {cd_str})"
593
607
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
594
608
  Vrs.is_perfect = False
@@ -598,16 +612,16 @@ class VerseTester(object):
598
612
  # one half imperfect, one length error
599
613
 
600
614
  elif pAdas_ab_result.length_error() and pAdas_cd_result.imperfect():
601
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
602
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
615
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
616
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
603
617
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {cd_str})"
604
618
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
605
619
  Vrs.is_perfect = False
606
620
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
607
621
  return pAdas_cd_result
608
622
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.length_error():
609
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
610
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
623
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
624
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
611
625
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: ?? {cd_str})"
612
626
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
613
627
  Vrs.is_perfect = False
@@ -678,12 +692,10 @@ class VerseTester(object):
678
692
  imperfect_note = None
679
693
 
680
694
  if self.pAdasamatva_count == 3:
681
- imperfect_note = "? 3 eva pādāḥ yuktāḥ"
682
- meter_label += " (%s)" % imperfect_note
695
+ imperfect_note = True
683
696
  score = meter_scores["samavṛtta, imperfect (3)"]
684
697
  elif self.pAdasamatva_count == 2:
685
- imperfect_note = "? 2 eva pādāḥ yuktāḥ"
686
- meter_label += " (%s)" % imperfect_note
698
+ imperfect_note = True
687
699
  score = meter_scores["samavṛtta, imperfect (2)"]
688
700
  elif self.pAdasamatva_count == 0:
689
701
  imperfect_note = "1 eva pādaḥ"
@@ -746,12 +758,10 @@ class VerseTester(object):
746
758
  problem_syllables=problem_syllables or None,
747
759
  )
748
760
  else:
749
- # fewer than 4 matching pādas; append any length notes to the meter_label
750
- length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items() if v in ('adhikākṣarā', 'ūnākṣarā')]
751
- full_imperfect_str = imperfect_note
761
+ # fewer than 4 matching pādas; append per-pāda notes to the meter_label
762
+ length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
752
763
  if length_notes:
753
- full_imperfect_str += "; " + "; ".join(length_notes)
754
- meter_label = meter_label.replace(f"({imperfect_note})", f"({full_imperfect_str})")
764
+ meter_label += " (%s)" % "; ".join(length_notes)
755
765
  diagnostic = Diagnostic(
756
766
  imperfect_label_sanskrit=per_pada_sanskrit or None,
757
767
  imperfect_label_english=per_pada_english or None,
@@ -760,7 +770,7 @@ class VerseTester(object):
760
770
 
761
771
  # score arbitration: may tie with pre-existing result (e.g., upajāti)
762
772
  old_score = Vrs.identification_score
763
- self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=imperfect_note is None and not has_any_error)
773
+ self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=not imperfect_note and not has_any_error)
764
774
  if score >= old_score:
765
775
  Vrs.diagnostic = diagnostic
766
776
 
@@ -945,14 +955,13 @@ class VerseTester(object):
945
955
 
946
956
  meter_labels.append(meter_label)
947
957
 
948
- unique_meter_labels = list(set(meter_labels)) # de-dupe
958
+ unique_meter_labels = sorted(set(meter_labels)) # de-dupe, stable order
949
959
  combined_meter_labels = ', '.join(unique_meter_labels)
950
960
 
951
961
  # Assign score based on how complete and homogeneous the match is.
952
962
  family = meter_patterns.samavftta_family_names[wbp_lens[0]] if wbp_lens[0] < 27 else 'daṇḍaka'
953
- unique_meter_labels_copy = unique_meter_labels; unique_meter_labels_copy.sort()
954
963
  if (family == "triṣṭubh" and
955
- unique_meter_labels_copy == ['indravajrā [11: ttjgg]', 'upendravajrā [11: jtjgg]']
964
+ unique_meter_labels == ['indravajrā [11: ttjgg]', 'upendravajrā [11: jtjgg]']
956
965
  ):
957
966
  family = '' # clearer not to specify in this case
958
967
 
@@ -976,19 +985,12 @@ class VerseTester(object):
976
985
  if all(lbl.startswith('ajñātam') for lbl in meter_labels):
977
986
  score -= 1
978
987
 
979
- imperfect_note = None
988
+ imperfect_note = len(wbp_lens) != 4 and unique_sorted_lens != [11, 12]
980
989
  overall_meter_label = "upajāti %s: %s" % (
981
990
  family,
982
991
  combined_meter_labels
983
992
  )
984
993
 
985
- if (
986
- len(wbp_lens) != 4 and
987
- unique_sorted_lens != [11, 12]
988
- ): # not perfect and also not triṣṭubh-jagatī-saṃkara
989
- imperfect_note = "? %d eva pādāḥ yuktāḥ" % len(wbp_lens)
990
- overall_meter_label += " (%s)" % imperfect_note
991
-
992
994
  # Build diagnostic: excluded pādas are flagged as hyper/hypometric relative
993
995
  # to the majority length; included pādas contribute no error entry.
994
996
  most_freq_len = wbp_lens[0] if wbp_lens else None
@@ -1005,11 +1007,14 @@ class VerseTester(object):
1005
1007
  per_pada_sanskrit[pada_num] = 'adhikākṣarā' if hyper else 'ūnākṣarā'
1006
1008
  per_pada_english[pada_num] = 'hypermetric' if hyper else 'hypometric'
1007
1009
 
1008
- if imperfect_note is None and not per_pada_english:
1009
- # all four pādas included and none flagged
1010
+ # Append per-pāda imperfect notes to label.
1011
+ length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
1012
+ if length_notes:
1013
+ overall_meter_label += " (%s)" % "; ".join(length_notes)
1014
+
1015
+ if not per_pada_english and not imperfect_note:
1010
1016
  diagnostic = Diagnostic(perfect_id_label=overall_meter_label)
1011
- elif imperfect_note is None:
1012
- # all four pādas included but some have length errors
1017
+ elif not imperfect_note:
1013
1018
  diagnostic = Diagnostic(
1014
1019
  perfect_id_label=overall_meter_label,
1015
1020
  imperfect_label_sanskrit=per_pada_sanskrit or None,
@@ -1017,11 +1022,6 @@ class VerseTester(object):
1017
1022
  problem_syllables=problem_syllables or None,
1018
1023
  )
1019
1024
  else:
1020
- # fewer than 4 pādas included; append length notes to the meter_label
1021
- length_notes = [f"pāda {p} {v}" for p, v in per_pada_sanskrit.items()]
1022
- if length_notes:
1023
- full_imperfect_str = imperfect_note + "; " + "; ".join(length_notes)
1024
- overall_meter_label = overall_meter_label.replace(f"({imperfect_note})", f"({full_imperfect_str})")
1025
1025
  diagnostic = Diagnostic(
1026
1026
  imperfect_label_sanskrit=per_pada_sanskrit or None,
1027
1027
  imperfect_label_english=per_pada_english or None,
@@ -1030,7 +1030,8 @@ class VerseTester(object):
1030
1030
 
1031
1031
  # score arbitration: may tie with pre-existing result (e.g., samavṛtta)
1032
1032
  old_score = Vrs.identification_score
1033
- self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=imperfect_note is None and not per_pada_english)
1033
+ is_perfect = not imperfect_note and not per_pada_english
1034
+ self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=is_perfect)
1034
1035
  if score >= old_score:
1035
1036
  Vrs.diagnostic = diagnostic
1036
1037
 
@@ -1573,6 +1574,63 @@ class VerseTester(object):
1573
1574
  else:
1574
1575
  return 0
1575
1576
 
1577
+ def attempt_ardhatraya_identification(self, Vrs):
1578
+ """
1579
+ Identification for 6-pāda input (3 ardhas = 1.5 anuṣṭubh verses).
1580
+ Tests each ardha (pāda pair) independently with test_as_anuzwuB_half,
1581
+ then assembles a combined label and score from all three results.
1582
+ Returns 1 if identified, 0 otherwise.
1583
+ """
1584
+
1585
+ w_p = Vrs.syllable_weights.split('\n')
1586
+ if len(w_p) < 6:
1587
+ return 0
1588
+
1589
+ r1 = self.test_as_anuzwuB_half(w_p[0], w_p[1])
1590
+ r2 = self.test_as_anuzwuB_half(w_p[2], w_p[3])
1591
+ r3 = self.test_as_anuzwuB_half(w_p[4], w_p[5])
1592
+
1593
+ if r1 is None or r2 is None or r3 is None:
1594
+ return 0
1595
+
1596
+ def _ardha_label(r):
1597
+ if r.perfect():
1598
+ return r.perfect_id_label
1599
+ else:
1600
+ return '; '.join(r.imperfect_label_sanskrit.values())
1601
+
1602
+ l1, l2, l3 = _ardha_label(r1), _ardha_label(r2), _ardha_label(r3)
1603
+ Vrs.meter_label = f"anuṣṭubh (1,2: {l1}; 3,4: {l2}; 5,6: {l3})"
1604
+
1605
+ results = [r1, r2, r3]
1606
+ n_perfect = sum(1 for r in results if r.perfect())
1607
+ n_length_error = sum(1 for r in results if r.length_error())
1608
+ n_imperfect = 3 - n_perfect - n_length_error
1609
+
1610
+ if n_length_error == 0 and n_imperfect == 0:
1611
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, all halves perfect)"]
1612
+ Vrs.is_perfect = True
1613
+ elif n_length_error == 0:
1614
+ if n_imperfect == 3:
1615
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, at least one half imperfect)"]
1616
+ elif n_imperfect == 2:
1617
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, two imperfect)"]
1618
+ else:
1619
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, one imperfect)"]
1620
+ Vrs.is_perfect = False
1621
+ elif n_perfect > 0 and n_imperfect == 0:
1622
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some perfect some length error)"]
1623
+ Vrs.is_perfect = False
1624
+ elif n_imperfect > 0 and n_perfect == 0:
1625
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some imperfect some length error)"]
1626
+ Vrs.is_perfect = False
1627
+ else:
1628
+ Vrs.identification_score = meter_scores["anuṣṭubh, 1 or 3 halves, some perfect some length error)"]
1629
+ Vrs.is_perfect = False
1630
+
1631
+ Vrs.diagnostic = {'ab': r1, 'cd': r2, 'ef': r3}
1632
+ return 1
1633
+
1576
1634
 
1577
1635
  class MeterIdentifier(object):
1578
1636
  """
@@ -1598,6 +1656,8 @@ class MeterIdentifier(object):
1598
1656
  """
1599
1657
 
1600
1658
  iter_list = [start_pos]
1659
+ if resplit_option == 'none':
1660
+ return iter_list
1601
1661
  if resplit_option == 'resplit_max':
1602
1662
  distance_multiplier = 0.50 # wiggle as far as 50% of part_len
1603
1663
  elif resplit_option == 'resplit_lite':
@@ -1683,6 +1743,141 @@ class MeterIdentifier(object):
1683
1743
  return Verses_found
1684
1744
 
1685
1745
 
1746
+ def resplit_Verse_ardhatraya(self, syllable_list, ab_br, bc_br, cd_br, de_br, ef_br):
1747
+ syllable_list = list(syllable_list)
1748
+ _fix_conjunct_pada_boundaries(syllable_list, [ab_br, cd_br, ef_br])
1749
+ sss = scansion_syllable_separator
1750
+ return (sss.join(syllable_list[:ab_br]) + '\n'
1751
+ + sss.join(syllable_list[ab_br:bc_br]) + '\n'
1752
+ + sss.join(syllable_list[bc_br:cd_br]) + '\n'
1753
+ + sss.join(syllable_list[cd_br:de_br]) + '\n'
1754
+ + sss.join(syllable_list[de_br:ef_br]) + '\n'
1755
+ + sss.join(syllable_list[ef_br:])
1756
+ )
1757
+
1758
+ def constrained_resplit_identify(self, Vrs, syllable_list, VrsTster,
1759
+ n_pAdas, pada_len, resplit_option,
1760
+ resplit_func, attempt_func,
1761
+ keep_mid_breaks=None, user_seeds=None):
1762
+ """
1763
+ Constrained resplit enumerator for known-structure meters.
1764
+
1765
+ Rather than wiggling break positions freely, generates only splits where
1766
+ every pāda length falls within [pada_len - tol, pada_len + tol], where
1767
+ tol = 1 for resplit_lite/none, 2 for resplit_max.
1768
+
1769
+ n_pAdas: number of pādas expected (e.g. 6 for ardhatraya, 4 for samavṛtta)
1770
+ pada_len: canonical pāda length in syllables
1771
+ resplit_func: callable(syllable_list, *break_positions) → text_syllabified
1772
+ attempt_func: callable(Vrs) → 0 or 1
1773
+ keep_mid_breaks: set of 0-indexed break indices to lock to seed position
1774
+ (e.g. {1, 3} for ardhatraya bc/de when resplit_keep_midpoint)
1775
+ user_seeds: list of break positions derived from user-provided punctuation/newlines;
1776
+ overrides canonical pada_len-based seeds where provided
1777
+
1778
+ Returns a list for MeterIdentifier.Verses_found.
1779
+ """
1780
+ tol = 1 if resplit_option in ('none', 'resplit_lite') else 2
1781
+ keep_mid_breaks = keep_mid_breaks or set()
1782
+ n_breaks = n_pAdas - 1
1783
+ total = len(syllable_list)
1784
+
1785
+ # Seed each break: prefer user-provided positions, fall back to canonical.
1786
+ canonical_seeds = [pada_len * (i + 1) for i in range(n_breaks)]
1787
+ seeds = list(user_seeds) if user_seeds else canonical_seeds
1788
+
1789
+ # For each break, build the list of candidate positions:
1790
+ # either locked to seed (keep_midpoint) or all positions in [seed-tol, seed+tol].
1791
+ def candidates(break_idx):
1792
+ seed = seeds[break_idx]
1793
+ if break_idx in keep_mid_breaks:
1794
+ return [seed]
1795
+ return list(range(seed - tol, seed + tol + 1))
1796
+
1797
+ S = Sc()
1798
+ Verses_found = []
1799
+
1800
+ def _recurse(break_idx, chosen):
1801
+ if break_idx == n_breaks:
1802
+ try:
1803
+ new_text_syllabified = resplit_func(syllable_list, *chosen)
1804
+ temp_V = copy(Vrs)
1805
+ temp_V.text_syllabified = new_text_syllabified
1806
+ if _DEBUG_TIMING:
1807
+ _section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + 1
1808
+ temp_V.syllable_weights = timed('scan_weights')(S.scan_syllable_weights)(
1809
+ temp_V.text_syllabified)
1810
+ temp_V.morae_per_line = timed('scan_morae_gana')(S.count_morae)(
1811
+ temp_V.syllable_weights)
1812
+ temp_V.gaRa_abbreviations = timed('scan_morae_gana')(
1813
+ lambda: '\n'.join([S.gaRa_abbreviate(line) for line in temp_V.syllable_weights.split('\n')])
1814
+ )()
1815
+ success = attempt_func(temp_V)
1816
+ if success:
1817
+ Verses_found.append(temp_V)
1818
+ if temp_V.identification_score == meter_scores["max score"]:
1819
+ return True # signal early exit
1820
+ except IndexError:
1821
+ pass
1822
+ return False
1823
+
1824
+ prev = chosen[-1] if chosen else 0
1825
+ for pos in candidates(break_idx):
1826
+ seg_len = pos - prev
1827
+ if not (pada_len - tol <= seg_len <= pada_len + tol):
1828
+ continue
1829
+ # check remaining syllables can form valid pādas
1830
+ remaining = total - pos
1831
+ remaining_breaks = n_breaks - break_idx - 1
1832
+ remaining_pAdas = n_pAdas - break_idx - 1
1833
+ min_remaining = remaining_pAdas * (pada_len - tol)
1834
+ max_remaining = remaining_pAdas * (pada_len + tol)
1835
+ if not (min_remaining <= remaining <= max_remaining):
1836
+ continue
1837
+ if _recurse(break_idx + 1, chosen + [pos]):
1838
+ return True # propagate early exit
1839
+ return False
1840
+
1841
+ _recurse(0, [])
1842
+ return Verses_found
1843
+
1844
+ def wiggle_identify_ardhatraya(self, Vrs, syllable_list, VrsTster,
1845
+ newline_indices, text_syllabified):
1846
+ """Constrained resplit for 6-pāda (3-ardha) anuṣṭubh."""
1847
+ resplit_option = VrsTster.resplit_option
1848
+ pada_len = 8
1849
+ n_breaks = 5
1850
+
1851
+ # Derive user seeds from punctuation/newlines when available,
1852
+ # mirroring the seeding logic in wiggle_identify.
1853
+ user_seeds = None
1854
+ if len(newline_indices) == n_breaks:
1855
+ if resplit_option in ('none', 'resplit_lite'):
1856
+ # all breaks provided — seed all five from user positions
1857
+ user_seeds = [
1858
+ text_syllabified[:newline_indices[i]].count(scansion_syllable_separator)
1859
+ for i in range(n_breaks)
1860
+ ]
1861
+ elif resplit_option == 'resplit_max' and VrsTster.resplit_keep_midpoint:
1862
+ # seed bc (idx 1) and de (idx 3) from user positions, wiggle the rest
1863
+ canonical = [pada_len * (i + 1) for i in range(n_breaks)]
1864
+ canonical[1] = text_syllabified[:newline_indices[1]].count(scansion_syllable_separator)
1865
+ canonical[3] = text_syllabified[:newline_indices[3]].count(scansion_syllable_separator)
1866
+ user_seeds = canonical
1867
+
1868
+ keep_mid = {1, 3} if VrsTster.resplit_keep_midpoint else set()
1869
+
1870
+ return self.constrained_resplit_identify(
1871
+ Vrs, syllable_list, VrsTster,
1872
+ n_pAdas=6, pada_len=pada_len,
1873
+ resplit_option=resplit_option,
1874
+ resplit_func=self.resplit_Verse_ardhatraya,
1875
+ attempt_func=VrsTster.attempt_ardhatraya_identification,
1876
+ keep_mid_breaks=keep_mid,
1877
+ user_seeds=user_seeds,
1878
+ )
1879
+
1880
+
1686
1881
  def find_meter(self, rw_str, from_scheme=None):
1687
1882
 
1688
1883
  self.Scanner = S = Sc()
@@ -1742,7 +1937,7 @@ class MeterIdentifier(object):
1742
1937
 
1743
1938
  if _DEBUG_TIMING:
1744
1939
  _pre_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
1745
- 'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
1940
+ 'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
1746
1941
  'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
1747
1942
  _pre = {k: _section_totals.get(k, 0.0) for k in _pre_keys}
1748
1943
 
@@ -1753,7 +1948,7 @@ class MeterIdentifier(object):
1753
1948
  self.VerseTester.resplit_option = resplit_option
1754
1949
  self.VerseTester.resplit_keep_midpoint = resplit_keep_midpoint
1755
1950
 
1756
- if resplit_option in ['none', 'single_pAda'] or V.text_cleaned == '':
1951
+ if resplit_option == 'single_pAda' or V.text_cleaned == '':
1757
1952
  # No resplitting: test the verse exactly as scanned.
1758
1953
  VT._ardha_stash = []
1759
1954
  VT._vizama_stash = []
@@ -1765,7 +1960,7 @@ class MeterIdentifier(object):
1765
1960
  if VT._vizama_stash and meter_scores["viṣamavṛtta, imperfect"] > V.identification_score:
1766
1961
  timed('lev_vizama')(VT.is_vizamavftta)(V)
1767
1962
 
1768
- elif resplit_option in ['resplit_max', 'resplit_lite']:
1963
+ elif resplit_option in ['none', 'resplit_max', 'resplit_lite']:
1769
1964
 
1770
1965
  # Capture any user-provided pāda breaks (newlines surviving scansion cleaning).
1771
1966
  newline_indices = [
@@ -1791,14 +1986,14 @@ class MeterIdentifier(object):
1791
1986
  )
1792
1987
 
1793
1988
  if len(newline_indices) == 3:
1794
- if resplit_option == 'resplit_lite':
1989
+ if resplit_option in ('none', 'resplit_lite'):
1795
1990
  # all three breaks provided — override all three
1796
1991
  pAda_brs['ab'], pAda_brs['bc'], pAda_brs['cd'] = (
1797
1992
  V.text_syllabified[:newline_indices[i]].count(
1798
1993
  scansion_syllable_separator
1799
1994
  ) for i in [0, 1, 2]
1800
1995
  )
1801
- elif (
1996
+ elif (
1802
1997
  resplit_option == 'resplit_max' and
1803
1998
  self.VerseTester.resplit_keep_midpoint
1804
1999
  ):
@@ -1807,15 +2002,15 @@ class MeterIdentifier(object):
1807
2002
  scansion_syllable_separator)
1808
2003
 
1809
2004
  elif len(newline_indices) == 1:
1810
- if (
1811
- resplit_option == 'resplit_lite'
2005
+ if (
2006
+ resplit_option in ('none', 'resplit_lite')
1812
2007
  ) or (
1813
2008
  resplit_option == 'resplit_max' and
1814
2009
  self.VerseTester.resplit_keep_midpoint
1815
2010
  ):
1816
2011
  # single break provided — treat as bc, wiggle the rest
1817
2012
  pAda_brs['bc'] = V.text_syllabified[:newline_indices[0]].count(
1818
- scansion_syllable_separator)
2013
+ scansion_syllable_separator)
1819
2014
 
1820
2015
  else:
1821
2016
  # unusable number of user-provided pāda breaks — use length-based seeds
@@ -1827,6 +2022,21 @@ class MeterIdentifier(object):
1827
2022
  pAda_brs, quarter_len
1828
2023
  )
1829
2024
 
2025
+ # --- ardhatraya pass (6-pāda / 3-ardha anuṣṭubh) ---
2026
+ best_4pAda_score = (
2027
+ max(v.identification_score for v in self.Verses_found)
2028
+ if self.Verses_found else 0
2029
+ )
2030
+ _ardhatraya_gate = best_4pAda_score < meter_scores["max score"] and 44 <= total_syll_count <= 52
2031
+ if _DEBUG_TIMING:
2032
+ _section_totals['ardhatraya_gate_count'] = _section_totals.get('ardhatraya_gate_count', 0) + (1 if _ardhatraya_gate else 0)
2033
+ if _ardhatraya_gate:
2034
+ ardhatraya_found = timed('ardhatraya')(self.wiggle_identify_ardhatraya)(
2035
+ V, syllable_list, VT,
2036
+ newline_indices, V.text_syllabified
2037
+ )
2038
+ self.Verses_found.extend(ardhatraya_found)
2039
+
1830
2040
  # Post-wiggle: deferred imperfect ardhasamavṛtta pass over accumulated stash.
1831
2041
  _lev_ardha_t0 = _time.perf_counter() if _DEBUG_TIMING else None
1832
2042
  ardha_stash = VT._ardha_stash
@@ -1972,7 +2182,7 @@ class MeterIdentifier(object):
1972
2182
 
1973
2183
  if _DEBUG_TIMING:
1974
2184
  all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
1975
- 'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
2185
+ 'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
1976
2186
  'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
1977
2187
  verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in all_keys}
1978
2188
  verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
@@ -2032,7 +2242,7 @@ def _identify_meter_worker(args):
2032
2242
  _mi._DEBUG_TIMING = True
2033
2243
  MI = MeterIdentifier()
2034
2244
  all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
2035
- 'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
2245
+ 'anuzwuB', 'ardhatraya', 'samavftta', 'upajAti', 'vizamavftta',
2036
2246
  'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
2037
2247
  if debug_timing:
2038
2248
  pre = {k: _section_totals.get(k, 0.0) for k in all_keys}
@@ -238,7 +238,7 @@ samavfttas_by_family_and_gaRa = {
238
238
  'jtj(r|B)' : 'vaṃśastha',
239
239
  'mmy(y|j)' : 'vaiśvadevī',
240
240
  'rrr(r|B)' : 'sragviṇī',
241
-
241
+ 'nnm(y|j)' : 'paṭuvṛtta',
242
242
  },
243
243
 
244
244
  13: {
@@ -313,7 +313,8 @@ samavfttas_by_family_and_gaRa = {
313
313
  22: {
314
314
  'mmtnnns(g|l)' : 'haṃsī', # also mmggnnnngg
315
315
  'tByjsrn(g|l)' : 'aśvadhāṭī',
316
- },
316
+ 'Brnrnrn(g|l)' : 'madraka'
317
+ },
317
318
 
318
319
  23: {
319
320
  'njBjBjBl(g|l)' : 'adritanayā',
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.2
3
+ Version: 2.7.0
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -1 +0,0 @@
1
- __version__ = "2.6.2"
File without changes
File without changes
File without changes
File without changes