skrutable 2.4.0__tar.gz → 2.5.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {skrutable-2.4.0 → skrutable-2.5.0}/PKG-INFO +1 -1
  2. skrutable-2.5.0/src/skrutable/__init__.py +1 -0
  3. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/manual.md +25 -0
  4. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/meter_identification.py +173 -17
  5. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/scansion.py +9 -4
  6. skrutable-2.5.0/src/skrutable/utils.py +20 -0
  7. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable.egg-info/PKG-INFO +1 -1
  8. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable.egg-info/SOURCES.txt +1 -0
  9. skrutable-2.4.0/src/skrutable/__init__.py +0 -1
  10. {skrutable-2.4.0 → skrutable-2.5.0}/LICENSE.md +0 -0
  11. {skrutable-2.4.0 → skrutable-2.5.0}/README.md +0 -0
  12. {skrutable-2.4.0 → skrutable-2.5.0}/setup.cfg +0 -0
  13. {skrutable-2.4.0 → skrutable-2.5.0}/setup.py +0 -0
  14. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/config.json +0 -0
  15. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/config.py +0 -0
  16. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/generate_scheme_vectors.py +0 -0
  17. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/impossible_bigrams.json +0 -0
  18. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/meter_patterns.py +0 -0
  19. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/phonemes.py +0 -0
  20. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/run_examples.py +0 -0
  21. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/scheme_detection.py +0 -0
  22. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/scheme_maps.py +0 -0
  23. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/scheme_vectors.json +0 -0
  24. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/scheme_vectors_mbh.py +0 -0
  25. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/splitting.py +0 -0
  26. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/transliteration.py +0 -0
  27. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable/virAma_avoidance.py +0 -0
  28. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable.egg-info/dependency_links.txt +0 -0
  29. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable.egg-info/requires.txt +0 -0
  30. {skrutable-2.4.0 → skrutable-2.5.0}/src/skrutable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.4.0
3
+ Version: 2.5.0
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -0,0 +1 @@
1
+ __version__ = "2.5.0"
@@ -130,6 +130,31 @@ Key terms:
130
130
  * *jāti*: four quarters with set patterns of total moraic length
131
131
 
132
132
 
133
+ # scan timing profiling
134
+
135
+ `skrutable` includes a built-in profiling system for measuring meter identification performance across a corpus. It is disabled by default (`utils._DEBUG_TIMING = False`) and has no runtime cost unless explicitly enabled.
136
+
137
+ To use it, set `_DEBUG_TIMING` before importing `MeterIdentifier`:
138
+
139
+ ```python
140
+ import skrutable.utils as _utils
141
+ _utils._DEBUG_TIMING = True
142
+
143
+ from skrutable.meter_identification import MeterIdentifier, flush_profiling_report
144
+
145
+ MI = MeterIdentifier()
146
+ for verse in my_verses:
147
+ MI.identify_meter(verse, resplit_option="resplit_lite", resplit_keep_midpoint=True, from_scheme="IAST") # e.g.
148
+
149
+ flush_profiling_report() # prints table to stderr, resets counters
150
+ # flush_profiling_report(write_file=True) # also writes profiling_debug.txt
151
+ ```
152
+
153
+ The table breaks down wall-clock time per meter category with columns for each scan sub-phase (`clean`, `transl`, `syl`, `wts`, `mor+g`) and each identification type (`anuṣṭ`, `samav`, `jāti`, etc.), plus perfect/imperfect verse counts per category.
154
+
155
+ If using the front end, `make launch-profiling` (or `./launch.sh --scan-profiling`) enables profiling for the server process without touching any source files.
156
+
157
+
133
158
  # sandhi and compound splitting
134
159
 
135
160
  `skrutable` provides a wrapper for applying pre-trained splitting models via separate online servers ([my own splitter_server for the 2018 model](https://2018emnlp-sanskrit-splitter-server.duckdns.org/) and https://dharmamitra.org). A working internet connection is required for this functionality. The wrapper preserves original sentence length and punctuation, and it also helps utilize the Dharmamitra ByT5-Sanskrit model's ability to distinguish compounds from inter-word breaks.
@@ -1,6 +1,7 @@
1
1
  from skrutable.scansion import Scanner as Sc
2
2
  from skrutable import meter_patterns
3
3
  from skrutable.config import load_config_dict_from_json_file
4
+ from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
4
5
  import re
5
6
  from copy import copy
6
7
  from dataclasses import dataclass
@@ -14,6 +15,120 @@ default_resplit_keep_midpoint = config["default_resplit_keep_midpoint"] # e.g.
14
15
  disable_non_trizwuB_upajAti = config["disable_non_trizwuB_upajAti"] # e.g. True
15
16
  meter_scores = config["meter_scores"] # dict
16
17
 
18
+ _category_totals = {} # { category: { section: float seconds } }, single source of truth
19
+
20
+
21
+ _ARDHASAMAVRTTA_NAMES = [
22
+ 'aparavaktra', 'upacitra', 'puṣpitāgrā', 'viyoginī', 'vegavatī',
23
+ 'hariṇaplutā', 'aupacchandasika', 'ajñātārdhasamavṛtta',
24
+ ]
25
+ _JATI_SUBCATS = ['āryā', 'gīti', 'upagīti', 'udgīti', 'āryāgīti']
26
+
27
+ def _meter_label_to_category(label):
28
+ if not label or 'adhyavasitam' in label:
29
+ return 'na kiṃcid adhyavasitam'
30
+ if 'anuṣṭubh' in label or 'anustubh' in label:
31
+ return 'anuṣṭubh'
32
+ if 'upajāti' in label:
33
+ return 'upajāti'
34
+ if any(label.startswith(n) for n in _ARDHASAMAVRTTA_NAMES):
35
+ return 'ardhasamavṛtta'
36
+ if 'ardhasamavṛtta' in label:
37
+ return 'ardhasamavṛtta'
38
+ if label.startswith('udgatā'):
39
+ return 'viṣamavṛtta'
40
+ if any(label.startswith(s) for s in _JATI_SUBCATS):
41
+ return 'jāti'
42
+ if 'jāti' in label or 'vaitālīya' in label or 'mātrā' in label:
43
+ return 'jāti'
44
+ return 'samavṛtta'
45
+
46
+
47
+ def _verse_is_perfect(V):
48
+ """True iff V.is_perfect was set True at identification time."""
49
+ return getattr(V, 'is_perfect', False)
50
+
51
+
52
+ def flush_profiling_report(write_file=False):
53
+ """Print the accumulated profiling table to stderr, then reset all counters.
54
+
55
+ Pass write_file=True to also write the table to profiling_debug.txt alongside the library source.
56
+ Safe to call even when _DEBUG_TIMING is False (no-op).
57
+ """
58
+ if not _DEBUG_TIMING or not _category_totals:
59
+ return
60
+ import sys, os
61
+ scan_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana')
62
+ type_keys = ('anuzwuB', 'samavftta_etc', 'jAti')
63
+ type_abbrev = {
64
+ 'anuzwuB': 'anuṣṭ', 'samavftta_etc': 'samav', 'jAti': 'jāti',
65
+ }
66
+ scan_abbrev = {'scan_clean': 'clean', 'scan_translit': 'transl', 'scan_syllabify': 'syl', 'scan_weights': 'wts', 'scan_morae_gana': 'mor+g'}
67
+ cat_order = ['anuṣṭubh', 'samavṛtta', 'upajāti', 'ardhasamavṛtta', 'viṣamavṛtta', 'jāti', 'na kiṃcid adhyavasitam']
68
+ hdr_scan_abbrevs = [scan_abbrev[k] for k in scan_keys]
69
+ hdr_type_abbrevs = [type_abbrev[k] for k in type_keys]
70
+ val_w = len('0.00s')
71
+ col_cat_w = max(len(c) for c in cat_order + ['category']) + 2
72
+ sub_w = max(len('scan∑'), len('types∑'), len('total'), val_w) + 2
73
+ scan_col_ws = [max(len(a), val_w) + 1 for a in hdr_scan_abbrevs]
74
+ type_col_ws = [max(len(a), val_w) + 1 for a in hdr_type_abbrevs]
75
+ all_counts = [b.get('_count', 0) for b in _category_totals.values()]
76
+ count_w = max(len(str(max(all_counts))) if all_counts else 1, len('perf'), len('impf')) + 1
77
+
78
+ def fmt_row(scan_vals, type_vals):
79
+ return (' '.join(v.rjust(w) for v, w in zip(scan_vals, scan_col_ws))
80
+ + ' ' + ' '.join(v.rjust(w) for v, w in zip(type_vals, type_col_ws)))
81
+
82
+ n_verses = sum(b.get('_count', 0) for b in _category_totals.values())
83
+ wiggle_count = _section_totals.get('wiggle_count', 0)
84
+ lines = [f'\n=== {n_verses} verses / {wiggle_count} resplit candidates ===']
85
+ hdr = (' ' + 'category'.ljust(col_cat_w)
86
+ + 'perf'.rjust(count_w) + 'impf'.rjust(count_w)
87
+ + 'total'.rjust(sub_w) + 'scan∑'.rjust(sub_w) + 'types∑'.rjust(sub_w)
88
+ + ' ' + fmt_row(hdr_scan_abbrevs, hdr_type_abbrevs))
89
+ sep_w = col_cat_w + count_w * 2 + sub_w * 3 + 2 + sum(w + 2 for w in scan_col_ws) - 2 + 2 + sum(w + 2 for w in type_col_ws) - 2
90
+ sep = ' ' + '-' * sep_w
91
+ lines += [hdr, sep]
92
+ total_perfect = 0
93
+ total_imperfect = 0
94
+ for cat in cat_order:
95
+ bucket = _category_totals.get(cat)
96
+ if not bucket:
97
+ continue
98
+ cat_scan = sum(bucket.get(k, 0.0) for k in scan_keys)
99
+ cat_types = sum(bucket.get(k, 0.0) for k in type_keys)
100
+ scan_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in scan_keys]
101
+ type_vals = [f'{bucket.get(k, 0.0):.2f}s' for k in type_keys]
102
+ n_perf = bucket.get('_perfect_count', 0)
103
+ n_impf = bucket.get('_count', 0) - n_perf
104
+ total_perfect += n_perf
105
+ total_imperfect += n_impf
106
+ lines.append(' ' + cat.ljust(col_cat_w)
107
+ + str(n_perf).rjust(count_w) + str(n_impf).rjust(count_w)
108
+ + f'{cat_scan + cat_types:.2f}s'.rjust(sub_w)
109
+ + f'{cat_scan:.2f}s'.rjust(sub_w)
110
+ + f'{cat_types:.2f}s'.rjust(sub_w)
111
+ + ' ' + fmt_row(scan_vals, type_vals))
112
+ lines.append(sep)
113
+ total_scan = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in scan_keys)
114
+ total_types = sum(sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order) for k in type_keys)
115
+ total_scan_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in scan_keys]
116
+ total_type_vals = [f'{sum(_category_totals.get(c, {}).get(k, 0.0) for c in cat_order):.2f}s' for k in type_keys]
117
+ lines.append(' ' + 'TOTAL'.ljust(col_cat_w)
118
+ + str(total_perfect).rjust(count_w) + str(total_imperfect).rjust(count_w)
119
+ + f'{total_scan + total_types:.2f}s'.rjust(sub_w)
120
+ + f'{total_scan:.2f}s'.rjust(sub_w)
121
+ + f'{total_types:.2f}s'.rjust(sub_w)
122
+ + ' ' + fmt_row(total_scan_vals, total_type_vals))
123
+ block = '\n'.join(lines) + '\n'
124
+ if write_file:
125
+ timing_path = os.path.join(os.path.dirname(__file__), 'profiling_debug.txt')
126
+ with open(timing_path, 'w', encoding='utf-8') as _f:
127
+ _f.write(block)
128
+ print(block, file=sys.stderr, flush=True)
129
+ _category_totals.clear()
130
+ _section_totals.clear()
131
+
17
132
 
18
133
  @dataclass
19
134
  class Diagnostic:
@@ -193,7 +308,7 @@ class VerseTester(object):
193
308
  self.identification_attempt_count = 0
194
309
  self._anuzwuB_half_cache = {} # cleared per wiggle_identify run
195
310
 
196
- def combine_results(self, Vrs, new_label, new_score):
311
+ def combine_results(self, Vrs, new_label, new_score, new_is_perfect=False):
197
312
  old_label = Vrs.meter_label or ''
198
313
  old_score = Vrs.identification_score
199
314
 
@@ -207,6 +322,7 @@ class VerseTester(object):
207
322
  # override previous
208
323
  Vrs.meter_label = new_label
209
324
  Vrs.identification_score = new_score
325
+ Vrs.is_perfect = new_is_perfect
210
326
 
211
327
  elif new_score == old_score:
212
328
  # tie, concatenate as old + new
@@ -326,12 +442,14 @@ class VerseTester(object):
326
442
  if ardham_eva_result.perfect():
327
443
  Vrs.meter_label = f"anuṣṭubh (ardham eva: {ardham_eva_result.perfect_id_label})"
328
444
  Vrs.identification_score = meter_scores["anuṣṭubh, half, single half perfect)"]
445
+ Vrs.is_perfect = True
329
446
  Vrs.diagnostic = ardham_eva_result
330
447
  return ardham_eva_result
331
448
  elif ardham_eva_result.imperfect():
332
449
  label_str = '; '.join(f"{k}: {v}" for k, v in ardham_eva_result.imperfect_label_sanskrit.items())
333
450
  Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
334
451
  Vrs.identification_score = meter_scores["anuṣṭubh, half, single half imperfect)"]
452
+ Vrs.is_perfect = False
335
453
  Vrs.diagnostic = ardham_eva_result
336
454
  return ardham_eva_result
337
455
  else:
@@ -346,6 +464,7 @@ class VerseTester(object):
346
464
  if pAdas_ab_result.perfect() and pAdas_cd_result.perfect():
347
465
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {pAdas_cd_result.perfect_id_label})"
348
466
  Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves perfect)"]
467
+ Vrs.is_perfect = True
349
468
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
350
469
  return pAdas_ab_result
351
470
 
@@ -355,12 +474,14 @@ class VerseTester(object):
355
474
  ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
356
475
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
357
476
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
477
+ Vrs.is_perfect = False
358
478
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
359
479
  return pAdas_ab_result
360
480
  elif pAdas_ab_result.perfect() and pAdas_cd_result.imperfect():
361
481
  cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
362
482
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {cd_str})"
363
483
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
484
+ Vrs.is_perfect = False
364
485
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
365
486
  return pAdas_cd_result
366
487
 
@@ -371,6 +492,7 @@ class VerseTester(object):
371
492
  cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
372
493
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {cd_str})"
373
494
  Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves imperfect)"]
495
+ Vrs.is_perfect = False
374
496
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
375
497
  return pAdas_ab_result
376
498
 
@@ -380,12 +502,14 @@ class VerseTester(object):
380
502
  ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
381
503
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
382
504
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
505
+ Vrs.is_perfect = False
383
506
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
384
507
  return pAdas_cd_result
385
508
  elif pAdas_ab_result.perfect() and pAdas_cd_result.length_error():
386
509
  cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
387
510
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: ?? {cd_str})"
388
511
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
512
+ Vrs.is_perfect = False
389
513
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
390
514
  return pAdas_ab_result
391
515
 
@@ -396,6 +520,7 @@ class VerseTester(object):
396
520
  cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
397
521
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {cd_str})"
398
522
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
523
+ Vrs.is_perfect = False
399
524
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
400
525
  return pAdas_cd_result
401
526
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.length_error():
@@ -403,6 +528,7 @@ class VerseTester(object):
403
528
  cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
404
529
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: ?? {cd_str})"
405
530
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
531
+ Vrs.is_perfect = False
406
532
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
407
533
  return pAdas_ab_result
408
534
 
@@ -538,7 +664,7 @@ class VerseTester(object):
538
664
 
539
665
  # score arbitration: may tie with pre-existing result (e.g., upajāti)
540
666
  old_score = Vrs.identification_score
541
- self.combine_results(Vrs, new_label=meter_label, new_score=score)
667
+ self.combine_results(Vrs, new_label=meter_label, new_score=score, new_is_perfect=imperfect_note is None)
542
668
  if score >= old_score:
543
669
  Vrs.diagnostic = diagnostic
544
670
 
@@ -547,6 +673,7 @@ class VerseTester(object):
547
673
  def evaluate_ardhasamavftta(self, Vrs):
548
674
  # sufficient pAdasamatva already assured, now just evaluate
549
675
  Vrs.identification_score = meter_scores["ardhasamavṛtta, perfect"]
676
+ Vrs.is_perfect = True
550
677
 
551
678
  wbp = Vrs.syllable_weights.split('\n') # weights by pāda
552
679
 
@@ -575,6 +702,7 @@ class VerseTester(object):
575
702
  meter_label = "ajñātārdhasamavṛtta" # i.e., might need to add to meter_patterns
576
703
  meter_label += ' [%s, %s]' % (odd_g_to_id, even_g_to_id)
577
704
  Vrs.identification_score = meter_scores["ardhasamavṛtta, perfect, unknown"]
705
+ Vrs.is_perfect = True # "perfect, unknown" means pattern unknown, not imperfect
578
706
 
579
707
  Vrs.meter_label = meter_label
580
708
  Vrs.diagnostic = Diagnostic(perfect_id_label=meter_label)
@@ -729,7 +857,11 @@ class VerseTester(object):
729
857
 
730
858
  # score arbitration: may tie with pre-existing result (e.g., samavṛtta)
731
859
  old_score = Vrs.identification_score
732
- self.combine_results(Vrs, overall_meter_label, score)
860
+ is_perf = (score in (meter_scores["upajāti, perfect"],
861
+ meter_scores["upajāti, triṣṭubh-jagatī-saṃkara, perfect"],
862
+ meter_scores["upajāti, non-triṣṭubh, perfect"])
863
+ and 'ajñātam' not in overall_meter_label)
864
+ self.combine_results(Vrs, overall_meter_label, score, new_is_perfect=is_perf)
733
865
  if score >= old_score:
734
866
  Vrs.diagnostic = diagnostic
735
867
 
@@ -742,6 +874,7 @@ class VerseTester(object):
742
874
  for (a, b, c, d) in meter_patterns.vizamavftta_by_4_tuple:
743
875
  if (gs_to_id[0],gs_to_id[1],gs_to_id[2],gs_to_id[3]) == (a, b, c, d):
744
876
  Vrs.identification_score = meter_scores["viṣamavṛtta, perfect"]
877
+ Vrs.is_perfect = True
745
878
  Vrs.meter_label = meter_patterns.vizamavftta_by_4_tuple[(a, b, c, d)]
746
879
  Vrs.diagnostic = Diagnostic(perfect_id_label=Vrs.meter_label)
747
880
  return True
@@ -889,6 +1022,7 @@ class VerseTester(object):
889
1022
  suffix = '; '.join(f"ardha {i+1}: {v}" for i, v in enumerate(sa_vals))
890
1023
  Vrs.meter_label = jati_label + f" ({suffix})"
891
1024
  Vrs.identification_score = likely_score
1025
+ Vrs.is_perfect = False
892
1026
  Vrs.diagnostic = Diagnostic(
893
1027
  imperfect_label_sanskrit=per_pada_sanskrit or None,
894
1028
  imperfect_label_english=per_pada_english or None,
@@ -1064,6 +1198,7 @@ class VerseTester(object):
1064
1198
  if jati_score >= Vrs.identification_score:
1065
1199
  Vrs.meter_label = jati_label + f" ({imperfect_label_sa})"
1066
1200
  Vrs.identification_score = jati_score
1201
+ Vrs.is_perfect = False
1067
1202
  Vrs.mAtragaNa_abbreviations = mAtragaNa_abbrevs
1068
1203
  Vrs.diagnostic = Diagnostic(
1069
1204
  imperfect_label_sanskrit=label_sa_by_pada or None,
@@ -1116,6 +1251,7 @@ class VerseTester(object):
1116
1251
  if score >= Vrs.identification_score:
1117
1252
  Vrs.meter_label = new_label
1118
1253
  Vrs.identification_score = score
1254
+ Vrs.is_perfect = score == meter_scores["jāti, perfect"]
1119
1255
  Vrs.mAtragaNa_abbreviations = mAtragaNa_abbrevs
1120
1256
  Vrs.diagnostic = diagnostic
1121
1257
  return 1
@@ -1156,25 +1292,23 @@ class VerseTester(object):
1156
1292
  self.identification_attempt_count += 1
1157
1293
 
1158
1294
  # anuzwuB
1159
-
1160
- anuzwuB_diagnostic = self.test_as_anuzwuB(Vrs) # Diagnostic if successful, None if not
1161
- if anuzwuB_diagnostic and Vrs.identification_score == meter_scores["max score"]:
1295
+ success_anuzwuB = timed('anuzwuB')(self.test_as_anuzwuB)(Vrs)
1296
+ if success_anuzwuB and Vrs.identification_score == meter_scores["max score"]:
1162
1297
  return 1
1163
1298
 
1164
1299
  # samavftta, upajAti, vizamavftta, ardhasamavftta
1165
-
1166
- success_samavftta_etc = self.test_as_samavftta_etc(Vrs)
1167
- if success_samavftta_etc and Vrs.identification_score >= 8: return 1
1300
+ success_samavftta_etc = timed('samavftta_etc')(self.test_as_samavftta_etc)(Vrs)
1301
+ if success_samavftta_etc and Vrs.identification_score >= 8:
1302
+ return 1
1168
1303
  # i.e., if upajāti or anything imperfect, also continue on to check jāti
1169
1304
 
1170
1305
  # problem: how to change above handling for rare case
1171
1306
  # where ardhasamavftta is also jAti?
1172
1307
 
1173
1308
  # jāti
1309
+ success_jAti = timed('jAti')(self.test_as_jAti)(Vrs)
1174
1310
 
1175
- success_jAti = self.test_as_jAti(Vrs)
1176
-
1177
- if anuzwuB_diagnostic or success_samavftta_etc or success_jAti:
1311
+ if success_anuzwuB or success_samavftta_etc or success_jAti:
1178
1312
  return 1
1179
1313
  else:
1180
1314
  return 0
@@ -1260,13 +1394,17 @@ class MeterIdentifier(object):
1260
1394
 
1261
1395
  temp_V = copy(Vrs)
1262
1396
  temp_V.text_syllabified = new_text_syllabified
1263
- temp_V.syllable_weights = S.scan_syllable_weights(
1397
+
1398
+ if _DEBUG_TIMING:
1399
+ _section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + 1
1400
+
1401
+ temp_V.syllable_weights = timed('scan_weights')(S.scan_syllable_weights)(
1264
1402
  temp_V.text_syllabified)
1265
- temp_V.morae_per_line = S.count_morae(
1403
+ temp_V.morae_per_line = timed('scan_morae_gana')(S.count_morae)(
1266
1404
  temp_V.syllable_weights)
1267
- temp_V.gaRa_abbreviations = '\n'.join(
1268
- [ S.gaRa_abbreviate(line) for line in temp_V.syllable_weights.split('\n') ]
1269
- )
1405
+ temp_V.gaRa_abbreviations = timed('scan_morae_gana')(
1406
+ lambda: '\n'.join([ S.gaRa_abbreviate(line) for line in temp_V.syllable_weights.split('\n') ])
1407
+ )()
1270
1408
 
1271
1409
  success = VrsTster.attempt_identification(temp_V)
1272
1410
 
@@ -1343,6 +1481,11 @@ class MeterIdentifier(object):
1343
1481
  # gets back mostly populated Verse object
1344
1482
  V = S.scan(rw_str, from_scheme=from_scheme)
1345
1483
 
1484
+ if _DEBUG_TIMING:
1485
+ _pre_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
1486
+ 'anuzwuB', 'samavftta_etc', 'jAti')
1487
+ _pre = {k: _section_totals.get(k, 0.0) for k in _pre_keys}
1488
+
1346
1489
  self.VerseTester = VT = VerseTester()
1347
1490
  self.VerseTester.resplit_option = resplit_option
1348
1491
  self.VerseTester.resplit_keep_midpoint = resplit_keep_midpoint
@@ -1423,4 +1566,17 @@ class MeterIdentifier(object):
1423
1566
  V.meter_label = 'na kiṃcid adhyavasitam'
1424
1567
  V.identification_score = meter_scores["none found"]
1425
1568
 
1569
+ if _DEBUG_TIMING:
1570
+ all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
1571
+ 'anuzwuB', 'samavftta_etc', 'jAti')
1572
+ verse_times = {k: _section_totals.get(k, 0.0) - _pre[k] for k in all_keys}
1573
+ verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
1574
+ cat = _meter_label_to_category(V.meter_label)
1575
+ bucket = _category_totals.setdefault(cat, {})
1576
+ for k, v in verse_times.items():
1577
+ bucket[k] = bucket.get(k, 0.0) + v
1578
+ bucket['_count'] = bucket.get('_count', 0) + 1
1579
+ if _verse_is_perfect(V):
1580
+ bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
1581
+
1426
1582
  return V
@@ -3,6 +3,7 @@ from skrutable import scheme_detection
3
3
  from skrutable import meter_patterns
4
4
  from skrutable import phonemes
5
5
  from skrutable.config import load_config_dict_from_json_file
6
+ from skrutable.utils import timed
6
7
  import re
7
8
 
8
9
  # load config variables
@@ -146,6 +147,7 @@ class Scanner(object):
146
147
  self.Transliterator = None # will hold Transliterator object
147
148
 
148
149
 
150
+ @timed('scan_clean')
149
151
  def clean_input(self, cntnts, scheme_in):
150
152
  """
151
153
  Accepts raw text string,
@@ -174,6 +176,7 @@ class Scanner(object):
174
176
 
175
177
  return cntnts
176
178
 
179
+ @timed('scan_syllabify')
177
180
  def syllabify_text(self, txt_SLP):
178
181
  """
179
182
  Accepts (newline-separated) multi-line string of SLP text.
@@ -238,6 +241,7 @@ class Scanner(object):
238
241
  return text_syllabified
239
242
 
240
243
 
244
+ @timed('scan_weights')
241
245
  def scan_syllable_weights(self, txt_syl):
242
246
  """
243
247
  Accepts (newline-separated) multi-line string of text
@@ -295,6 +299,7 @@ class Scanner(object):
295
299
  return syllable_weights
296
300
 
297
301
 
302
+ @timed('scan_morae_gana')
298
303
  def count_morae(self, syl_wts):
299
304
  """
300
305
  Accepts (newline-separated) multi-line string of text
@@ -370,13 +375,13 @@ class Scanner(object):
370
375
  T.scheme_out = 'SLP'
371
376
 
372
377
  V.text_cleaned = self.clean_input(V.text_raw, V.original_scheme)
373
- V.text_SLP = T.transliterate(V.text_cleaned)
378
+ V.text_SLP = timed('scan_translit')(T.transliterate)(V.text_cleaned)
374
379
  V.text_syllabified = self.syllabify_text(V.text_SLP)
375
380
  V.syllable_weights = self.scan_syllable_weights(V.text_syllabified)
376
381
  V.morae_per_line = self.count_morae(V.syllable_weights)
377
- V.gaRa_abbreviations = '\n'.join(
378
- [ self.gaRa_abbreviate(line) for line in V.syllable_weights.split('\n') ]
379
- )
382
+ V.gaRa_abbreviations = timed('scan_morae_gana')(
383
+ lambda: '\n'.join([ self.gaRa_abbreviate(line) for line in V.syllable_weights.split('\n') ])
384
+ )()
380
385
 
381
386
  self.Verse = V
382
387
  self.Transliterator = T
@@ -0,0 +1,20 @@
1
+ import time
2
+ from functools import wraps
3
+
4
+ _DEBUG_TIMING = False
5
+
6
+ _section_totals = {} # flat dict of all timing buckets: scan sub-keys, id type keys, wiggle, etc.
7
+
8
+ def timed(key):
9
+ """Decorator that accumulates wall time for the wrapped call into _section_totals[key]."""
10
+ def decorator(fn):
11
+ @wraps(fn)
12
+ def wrapper(*args, **kwargs):
13
+ if _DEBUG_TIMING:
14
+ t0 = time.perf_counter()
15
+ result = fn(*args, **kwargs)
16
+ _section_totals[key] = _section_totals.get(key, 0.0) + time.perf_counter() - t0
17
+ return result
18
+ return fn(*args, **kwargs)
19
+ return wrapper
20
+ return decorator
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.4.0
3
+ Version: 2.5.0
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -19,6 +19,7 @@ src/skrutable/scheme_vectors.json
19
19
  src/skrutable/scheme_vectors_mbh.py
20
20
  src/skrutable/splitting.py
21
21
  src/skrutable/transliteration.py
22
+ src/skrutable/utils.py
22
23
  src/skrutable/virAma_avoidance.py
23
24
  src/skrutable.egg-info/PKG-INFO
24
25
  src/skrutable.egg-info/SOURCES.txt
@@ -1 +0,0 @@
1
- __version__ = "2.4.0"
File without changes
File without changes
File without changes
File without changes