skrutable 2.6.1__tar.gz → 2.6.3__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {skrutable-2.6.1 → skrutable-2.6.3}/PKG-INFO +1 -1
  2. skrutable-2.6.3/src/skrutable/__init__.py +1 -0
  3. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/meter_identification.py +104 -13
  4. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/PKG-INFO +1 -1
  5. skrutable-2.6.1/src/skrutable/__init__.py +0 -1
  6. {skrutable-2.6.1 → skrutable-2.6.3}/LICENSE.md +0 -0
  7. {skrutable-2.6.1 → skrutable-2.6.3}/README.md +0 -0
  8. {skrutable-2.6.1 → skrutable-2.6.3}/setup.cfg +0 -0
  9. {skrutable-2.6.1 → skrutable-2.6.3}/setup.py +0 -0
  10. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/config.json +0 -0
  11. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/config.py +0 -0
  12. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/generate_scheme_vectors.py +0 -0
  13. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/impossible_bigrams.json +0 -0
  14. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/manual.md +0 -0
  15. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/meter_patterns.py +0 -0
  16. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/phonemes.py +0 -0
  17. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/run_examples.py +0 -0
  18. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scansion.py +0 -0
  19. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_detection.py +0 -0
  20. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_maps.py +0 -0
  21. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_vectors.json +0 -0
  22. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_vectors_mbh.py +0 -0
  23. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/splitting.py +0 -0
  24. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/transliteration.py +0 -0
  25. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/utils.py +0 -0
  26. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/virAma_avoidance.py +0 -0
  27. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/SOURCES.txt +0 -0
  28. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/dependency_links.txt +0 -0
  29. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/requires.txt +0 -0
  30. {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.1
3
+ Version: 2.6.3
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -0,0 +1 @@
1
+ __version__ = "2.6.3"
@@ -5,9 +5,13 @@ from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
5
5
  import re
6
6
  import time as _time
7
7
  from copy import copy
8
+ from concurrent.futures import ProcessPoolExecutor
8
9
  from dataclasses import dataclass
9
10
  from typing import Optional
10
11
 
12
+ BATCH_MAX_WORKERS = 5
13
+ BATCH_PARALLEL_THRESHOLD = 100
14
+
11
15
  # load config variables
12
16
  config = load_config_dict_from_json_file()
13
17
  scansion_syllable_separator = config["scansion_syllable_separator"] # e.g. " "
@@ -50,10 +54,12 @@ def _verse_is_perfect(V):
50
54
  return getattr(V, 'is_perfect', False)
51
55
 
52
56
 
53
- def flush_profiling_report(write_file=False):
57
+ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_workers=None):
54
58
  """Print the accumulated profiling table to stderr, then reset all counters.
55
59
 
56
60
  Pass write_file=True to also write the table to profiling_debug.txt alongside the library source.
61
+ Pass wall_clock_secs to append a timing footer line.
62
+ Pass parallel_workers (int) to show worker count and parallelization speedup; omit or pass None for serial runs.
57
63
  Safe to call even when _DEBUG_TIMING is False (no-op).
58
64
  """
59
65
  if not _DEBUG_TIMING or not _category_totals:
@@ -124,6 +130,13 @@ def flush_profiling_report(write_file=False):
124
130
  + f'{total_scan:.2f}s'.rjust(sub_w)
125
131
  + f'{total_types:.2f}s'.rjust(sub_w)
126
132
  + ' ' + fmt_row(total_scan_vals, total_type_vals))
133
+ if wall_clock_secs is not None:
134
+ table_total = total_scan + total_types
135
+ if parallel_workers is not None:
136
+ speedup = table_total / wall_clock_secs if wall_clock_secs > 0 else float('inf')
137
+ lines.append(f'\n table total (CPU across {parallel_workers} workers, inflated by overhead): {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s | apparent parallelization speedup: {speedup:.2f}x')
138
+ else:
139
+ lines.append(f'\n table total: {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s')
127
140
  block = '\n'.join(lines) + '\n'
128
141
  if write_file:
129
142
  timing_path = os.path.join(os.path.dirname(__file__), 'profiling_debug.txt')
@@ -140,6 +153,8 @@ class Diagnostic:
140
153
  imperfect_label_sanskrit: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); Sanskrit only
141
154
  imperfect_label_english: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); English only
142
155
  problem_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); None if perfect
156
+ notable_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); green-highlighted "interesting/ok" syllables
157
+ notable_label: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); label for the notable feature (same string for skt/eng)
143
158
 
144
159
  def perfect(self):
145
160
  return self.perfect_id_label is not None
@@ -460,17 +475,28 @@ class VerseTester(object):
460
475
  result = None
461
476
  for weights_pattern, label in meter_patterns.anuzwuB_pAda['odd'].items():
462
477
  if re.match(weights_pattern, odd_pAda_weights):
463
- result = Diagnostic(perfect_id_label=label)
478
+ is_vipula = 'vipulā' in label
479
+ result = Diagnostic(
480
+ perfect_id_label=label,
481
+ notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
482
+ notable_label={'odd': label} if is_vipula else None,
483
+ )
464
484
  break
465
485
  if result is None:
466
486
  # Odd pāda matched no perfect pattern — try asamīcīna patterns
467
487
  # before falling back to the generic ya-gaṇa violation label.
468
488
  for weights_pattern, (label, problem_syls, code) in meter_patterns.anuzwuB_pAda_asamIcIna['odd'].items():
469
489
  if re.match(weights_pattern, odd_pAda_weights):
490
+ is_vipula = 'vipulā' in label
491
+ # extract vipulā name from label like "asamīcīnā, ma-vipulāyāḥ pūrvam raḥ syāt"
492
+ vipula_match = re.search(r'\w+-vipulā', label)
493
+ vipula_name = vipula_match.group(0) if vipula_match else None
470
494
  result = Diagnostic(
471
495
  imperfect_label_sanskrit={'odd': label},
472
496
  imperfect_label_english={'odd': code},
473
497
  problem_syllables={'odd': problem_syls},
498
+ notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
499
+ notable_label={'odd': vipula_name} if vipula_name else None,
474
500
  )
475
501
  break
476
502
  if result is None:
@@ -515,7 +541,7 @@ class VerseTester(object):
515
541
  Vrs.diagnostic = ardham_eva_result
516
542
  return ardham_eva_result
517
543
  elif ardham_eva_result.imperfect():
518
- label_str = '; '.join(f"{k}: {v}" for k, v in ardham_eva_result.imperfect_label_sanskrit.items())
544
+ label_str = '; '.join(ardham_eva_result.imperfect_label_sanskrit.values())
519
545
  Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
520
546
  Vrs.identification_score = meter_scores["anuṣṭubh, half, single half imperfect)"]
521
547
  Vrs.is_perfect = False
@@ -540,14 +566,14 @@ class VerseTester(object):
540
566
  # one half imperfect
541
567
 
542
568
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.perfect():
543
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
569
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
544
570
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
545
571
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
546
572
  Vrs.is_perfect = False
547
573
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
548
574
  return pAdas_ab_result
549
575
  elif pAdas_ab_result.perfect() and pAdas_cd_result.imperfect():
550
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
576
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
551
577
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {cd_str})"
552
578
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
553
579
  Vrs.is_perfect = False
@@ -557,8 +583,8 @@ class VerseTester(object):
557
583
  # both halves imperfect
558
584
 
559
585
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.imperfect():
560
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
561
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
586
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
587
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
562
588
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {cd_str})"
563
589
  Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves imperfect)"]
564
590
  Vrs.is_perfect = False
@@ -568,14 +594,14 @@ class VerseTester(object):
568
594
  # one half perfect, one length error
569
595
 
570
596
  elif pAdas_ab_result.length_error() and pAdas_cd_result.perfect():
571
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
597
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
572
598
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
573
599
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
574
600
  Vrs.is_perfect = False
575
601
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
576
602
  return pAdas_cd_result
577
603
  elif pAdas_ab_result.perfect() and pAdas_cd_result.length_error():
578
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
604
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
579
605
  Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: ?? {cd_str})"
580
606
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
581
607
  Vrs.is_perfect = False
@@ -585,16 +611,16 @@ class VerseTester(object):
585
611
  # one half imperfect, one length error
586
612
 
587
613
  elif pAdas_ab_result.length_error() and pAdas_cd_result.imperfect():
588
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
589
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
614
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
615
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
590
616
  Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {cd_str})"
591
617
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
592
618
  Vrs.is_perfect = False
593
619
  Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
594
620
  return pAdas_cd_result
595
621
  elif pAdas_ab_result.imperfect() and pAdas_cd_result.length_error():
596
- ab_str = '; '.join(f"{k}: {v}" for k, v in pAdas_ab_result.imperfect_label_sanskrit.items())
597
- cd_str = '; '.join(f"{k}: {v}" for k, v in pAdas_cd_result.imperfect_label_sanskrit.items())
622
+ ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
623
+ cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
598
624
  Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: ?? {cd_str})"
599
625
  Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
600
626
  Vrs.is_perfect = False
@@ -1972,3 +1998,68 @@ class MeterIdentifier(object):
1972
1998
  bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
1973
1999
 
1974
2000
  return V
2001
+
2002
+ def identify_meter_batch(self, rw_strs,
2003
+ resplit_option=default_resplit_option,
2004
+ resplit_keep_midpoint=default_resplit_keep_midpoint,
2005
+ from_scheme=None):
2006
+ """
2007
+ Parallel version of identify_meter() for a list of raw strings.
2008
+
2009
+ Spawns up to BATCH_MAX_WORKERS worker processes, one task per verse.
2010
+ Returns a list of Verse objects in the same order as the input.
2011
+ When _DEBUG_TIMING is on, merges per-verse timing dicts back into
2012
+ the main process's _category_totals so flush_profiling_report() works.
2013
+ Falls back to serial processing for small batches below BATCH_PARALLEL_THRESHOLD.
2014
+ """
2015
+ if len(rw_strs) < BATCH_PARALLEL_THRESHOLD:
2016
+ return [self.identify_meter(s, resplit_option=resplit_option,
2017
+ resplit_keep_midpoint=resplit_keep_midpoint, from_scheme=from_scheme)
2018
+ for s in rw_strs]
2019
+
2020
+ args = [(s, resplit_option, resplit_keep_midpoint, from_scheme, _DEBUG_TIMING) for s in rw_strs]
2021
+ with ProcessPoolExecutor(max_workers=BATCH_MAX_WORKERS) as executor:
2022
+ results = list(executor.map(_identify_meter_worker, args))
2023
+
2024
+ if _DEBUG_TIMING:
2025
+ for V, verse_times, cat in results:
2026
+ _section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + verse_times.pop('wiggle_count', 0)
2027
+ bucket = _category_totals.setdefault(cat, {})
2028
+ for k, v in verse_times.items():
2029
+ bucket[k] = bucket.get(k, 0.0) + v
2030
+ bucket['_count'] = bucket.get('_count', 0) + 1
2031
+ if _verse_is_perfect(V):
2032
+ bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
2033
+ return [V for V, _, _ in results]
2034
+
2035
+ return results
2036
+
2037
+
2038
+ def _identify_meter_worker(args):
2039
+ """Module-level worker function (must be picklable). One verse per call."""
2040
+ rw_str, resplit_option, resplit_keep_midpoint, from_scheme, debug_timing = args
2041
+ if debug_timing:
2042
+ import skrutable.utils as _utils
2043
+ _utils._DEBUG_TIMING = True
2044
+ import skrutable.meter_identification as _mi
2045
+ _mi._DEBUG_TIMING = True
2046
+ MI = MeterIdentifier()
2047
+ all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
2048
+ 'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
2049
+ 'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
2050
+ if debug_timing:
2051
+ pre = {k: _section_totals.get(k, 0.0) for k in all_keys}
2052
+ pre_wiggle = _section_totals.get('wiggle_count', 0)
2053
+ V = MI.identify_meter(
2054
+ rw_str,
2055
+ resplit_option=resplit_option,
2056
+ resplit_keep_midpoint=resplit_keep_midpoint,
2057
+ from_scheme=from_scheme,
2058
+ )
2059
+ if debug_timing:
2060
+ verse_times = {k: _section_totals.get(k, 0.0) - pre[k] for k in all_keys}
2061
+ verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
2062
+ verse_times['wiggle_count'] = _section_totals.get('wiggle_count', 0) - pre_wiggle
2063
+ cat = _meter_label_to_category(V.meter_label)
2064
+ return V, verse_times, cat
2065
+ return V
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.1
3
+ Version: 2.6.3
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -1 +0,0 @@
1
- __version__ = "2.6.1"
File without changes
File without changes
File without changes
File without changes