skrutable 2.6.1__tar.gz → 2.6.3__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skrutable-2.6.1 → skrutable-2.6.3}/PKG-INFO +1 -1
- skrutable-2.6.3/src/skrutable/__init__.py +1 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/meter_identification.py +104 -13
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/PKG-INFO +1 -1
- skrutable-2.6.1/src/skrutable/__init__.py +0 -1
- {skrutable-2.6.1 → skrutable-2.6.3}/LICENSE.md +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/README.md +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/setup.cfg +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/setup.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/config.json +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/config.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/generate_scheme_vectors.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/impossible_bigrams.json +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/manual.md +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/meter_patterns.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/phonemes.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/run_examples.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scansion.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_vectors.json +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/splitting.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/transliteration.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/utils.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/SOURCES.txt +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.6.1 → skrutable-2.6.3}/src/skrutable.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.6.3"
|
|
@@ -5,9 +5,13 @@ from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
|
|
|
5
5
|
import re
|
|
6
6
|
import time as _time
|
|
7
7
|
from copy import copy
|
|
8
|
+
from concurrent.futures import ProcessPoolExecutor
|
|
8
9
|
from dataclasses import dataclass
|
|
9
10
|
from typing import Optional
|
|
10
11
|
|
|
12
|
+
BATCH_MAX_WORKERS = 5
|
|
13
|
+
BATCH_PARALLEL_THRESHOLD = 100
|
|
14
|
+
|
|
11
15
|
# load config variables
|
|
12
16
|
config = load_config_dict_from_json_file()
|
|
13
17
|
scansion_syllable_separator = config["scansion_syllable_separator"] # e.g. " "
|
|
@@ -50,10 +54,12 @@ def _verse_is_perfect(V):
|
|
|
50
54
|
return getattr(V, 'is_perfect', False)
|
|
51
55
|
|
|
52
56
|
|
|
53
|
-
def flush_profiling_report(write_file=False):
|
|
57
|
+
def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_workers=None):
|
|
54
58
|
"""Print the accumulated profiling table to stderr, then reset all counters.
|
|
55
59
|
|
|
56
60
|
Pass write_file=True to also write the table to profiling_debug.txt alongside the library source.
|
|
61
|
+
Pass wall_clock_secs to append a timing footer line.
|
|
62
|
+
Pass parallel_workers (int) to show worker count and parallelization speedup; omit or pass None for serial runs.
|
|
57
63
|
Safe to call even when _DEBUG_TIMING is False (no-op).
|
|
58
64
|
"""
|
|
59
65
|
if not _DEBUG_TIMING or not _category_totals:
|
|
@@ -124,6 +130,13 @@ def flush_profiling_report(write_file=False):
|
|
|
124
130
|
+ f'{total_scan:.2f}s'.rjust(sub_w)
|
|
125
131
|
+ f'{total_types:.2f}s'.rjust(sub_w)
|
|
126
132
|
+ ' ' + fmt_row(total_scan_vals, total_type_vals))
|
|
133
|
+
if wall_clock_secs is not None:
|
|
134
|
+
table_total = total_scan + total_types
|
|
135
|
+
if parallel_workers is not None:
|
|
136
|
+
speedup = table_total / wall_clock_secs if wall_clock_secs > 0 else float('inf')
|
|
137
|
+
lines.append(f'\n table total (CPU across {parallel_workers} workers, inflated by overhead): {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s | apparent parallelization speedup: {speedup:.2f}x')
|
|
138
|
+
else:
|
|
139
|
+
lines.append(f'\n table total: {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s')
|
|
127
140
|
block = '\n'.join(lines) + '\n'
|
|
128
141
|
if write_file:
|
|
129
142
|
timing_path = os.path.join(os.path.dirname(__file__), 'profiling_debug.txt')
|
|
@@ -140,6 +153,8 @@ class Diagnostic:
|
|
|
140
153
|
imperfect_label_sanskrit: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); Sanskrit only
|
|
141
154
|
imperfect_label_english: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); English only
|
|
142
155
|
problem_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); None if perfect
|
|
156
|
+
notable_syllables: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); green-highlighted "interesting/ok" syllables
|
|
157
|
+
notable_label: Optional[dict] = None # keyed by pada (1–4 or 'odd'/'even'); label for the notable feature (same string for skt/eng)
|
|
143
158
|
|
|
144
159
|
def perfect(self):
|
|
145
160
|
return self.perfect_id_label is not None
|
|
@@ -460,17 +475,28 @@ class VerseTester(object):
|
|
|
460
475
|
result = None
|
|
461
476
|
for weights_pattern, label in meter_patterns.anuzwuB_pAda['odd'].items():
|
|
462
477
|
if re.match(weights_pattern, odd_pAda_weights):
|
|
463
|
-
|
|
478
|
+
is_vipula = 'vipulā' in label
|
|
479
|
+
result = Diagnostic(
|
|
480
|
+
perfect_id_label=label,
|
|
481
|
+
notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
|
|
482
|
+
notable_label={'odd': label} if is_vipula else None,
|
|
483
|
+
)
|
|
464
484
|
break
|
|
465
485
|
if result is None:
|
|
466
486
|
# Odd pāda matched no perfect pattern — try asamīcīna patterns
|
|
467
487
|
# before falling back to the generic ya-gaṇa violation label.
|
|
468
488
|
for weights_pattern, (label, problem_syls, code) in meter_patterns.anuzwuB_pAda_asamIcIna['odd'].items():
|
|
469
489
|
if re.match(weights_pattern, odd_pAda_weights):
|
|
490
|
+
is_vipula = 'vipulā' in label
|
|
491
|
+
# extract vipulā name from label like "asamīcīnā, ma-vipulāyāḥ pūrvam raḥ syāt"
|
|
492
|
+
vipula_match = re.search(r'\w+-vipulā', label)
|
|
493
|
+
vipula_name = vipula_match.group(0) if vipula_match else None
|
|
470
494
|
result = Diagnostic(
|
|
471
495
|
imperfect_label_sanskrit={'odd': label},
|
|
472
496
|
imperfect_label_english={'odd': code},
|
|
473
497
|
problem_syllables={'odd': problem_syls},
|
|
498
|
+
notable_syllables={'odd': [4, 5, 6]} if is_vipula else None,
|
|
499
|
+
notable_label={'odd': vipula_name} if vipula_name else None,
|
|
474
500
|
)
|
|
475
501
|
break
|
|
476
502
|
if result is None:
|
|
@@ -515,7 +541,7 @@ class VerseTester(object):
|
|
|
515
541
|
Vrs.diagnostic = ardham_eva_result
|
|
516
542
|
return ardham_eva_result
|
|
517
543
|
elif ardham_eva_result.imperfect():
|
|
518
|
-
label_str = '; '.join(
|
|
544
|
+
label_str = '; '.join(ardham_eva_result.imperfect_label_sanskrit.values())
|
|
519
545
|
Vrs.meter_label = f"anuṣṭubh (ardham eva: {label_str})"
|
|
520
546
|
Vrs.identification_score = meter_scores["anuṣṭubh, half, single half imperfect)"]
|
|
521
547
|
Vrs.is_perfect = False
|
|
@@ -540,14 +566,14 @@ class VerseTester(object):
|
|
|
540
566
|
# one half imperfect
|
|
541
567
|
|
|
542
568
|
elif pAdas_ab_result.imperfect() and pAdas_cd_result.perfect():
|
|
543
|
-
ab_str = '; '.join(
|
|
569
|
+
ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
|
|
544
570
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
|
|
545
571
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
|
|
546
572
|
Vrs.is_perfect = False
|
|
547
573
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
548
574
|
return pAdas_ab_result
|
|
549
575
|
elif pAdas_ab_result.perfect() and pAdas_cd_result.imperfect():
|
|
550
|
-
cd_str = '; '.join(
|
|
576
|
+
cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
|
|
551
577
|
Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: {cd_str})"
|
|
552
578
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one imperfect)"]
|
|
553
579
|
Vrs.is_perfect = False
|
|
@@ -557,8 +583,8 @@ class VerseTester(object):
|
|
|
557
583
|
# both halves imperfect
|
|
558
584
|
|
|
559
585
|
elif pAdas_ab_result.imperfect() and pAdas_cd_result.imperfect():
|
|
560
|
-
ab_str = '; '.join(
|
|
561
|
-
cd_str = '; '.join(
|
|
586
|
+
ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
|
|
587
|
+
cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
|
|
562
588
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: {cd_str})"
|
|
563
589
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, both halves imperfect)"]
|
|
564
590
|
Vrs.is_perfect = False
|
|
@@ -568,14 +594,14 @@ class VerseTester(object):
|
|
|
568
594
|
# one half perfect, one length error
|
|
569
595
|
|
|
570
596
|
elif pAdas_ab_result.length_error() and pAdas_cd_result.perfect():
|
|
571
|
-
ab_str = '; '.join(
|
|
597
|
+
ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
|
|
572
598
|
Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {pAdas_cd_result.perfect_id_label})"
|
|
573
599
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
|
|
574
600
|
Vrs.is_perfect = False
|
|
575
601
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
576
602
|
return pAdas_cd_result
|
|
577
603
|
elif pAdas_ab_result.perfect() and pAdas_cd_result.length_error():
|
|
578
|
-
cd_str = '; '.join(
|
|
604
|
+
cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
|
|
579
605
|
Vrs.meter_label = f"anuṣṭubh (1,2: {pAdas_ab_result.perfect_id_label}; 3,4: ?? {cd_str})"
|
|
580
606
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half perfect, one length error)"]
|
|
581
607
|
Vrs.is_perfect = False
|
|
@@ -585,16 +611,16 @@ class VerseTester(object):
|
|
|
585
611
|
# one half imperfect, one length error
|
|
586
612
|
|
|
587
613
|
elif pAdas_ab_result.length_error() and pAdas_cd_result.imperfect():
|
|
588
|
-
ab_str = '; '.join(
|
|
589
|
-
cd_str = '; '.join(
|
|
614
|
+
ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
|
|
615
|
+
cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
|
|
590
616
|
Vrs.meter_label = f"anuṣṭubh (1,2: ?? {ab_str}; 3,4: {cd_str})"
|
|
591
617
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
|
|
592
618
|
Vrs.is_perfect = False
|
|
593
619
|
Vrs.diagnostic = {'ab': pAdas_ab_result, 'cd': pAdas_cd_result}
|
|
594
620
|
return pAdas_cd_result
|
|
595
621
|
elif pAdas_ab_result.imperfect() and pAdas_cd_result.length_error():
|
|
596
|
-
ab_str = '; '.join(
|
|
597
|
-
cd_str = '; '.join(
|
|
622
|
+
ab_str = '; '.join(pAdas_ab_result.imperfect_label_sanskrit.values())
|
|
623
|
+
cd_str = '; '.join(pAdas_cd_result.imperfect_label_sanskrit.values())
|
|
598
624
|
Vrs.meter_label = f"anuṣṭubh (1,2: {ab_str}; 3,4: ?? {cd_str})"
|
|
599
625
|
Vrs.identification_score = meter_scores["anuṣṭubh, full, one half imperfect, one length error)"]
|
|
600
626
|
Vrs.is_perfect = False
|
|
@@ -1972,3 +1998,68 @@ class MeterIdentifier(object):
|
|
|
1972
1998
|
bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
|
|
1973
1999
|
|
|
1974
2000
|
return V
|
|
2001
|
+
|
|
2002
|
+
def identify_meter_batch(self, rw_strs,
|
|
2003
|
+
resplit_option=default_resplit_option,
|
|
2004
|
+
resplit_keep_midpoint=default_resplit_keep_midpoint,
|
|
2005
|
+
from_scheme=None):
|
|
2006
|
+
"""
|
|
2007
|
+
Parallel version of identify_meter() for a list of raw strings.
|
|
2008
|
+
|
|
2009
|
+
Spawns up to BATCH_MAX_WORKERS worker processes, one task per verse.
|
|
2010
|
+
Returns a list of Verse objects in the same order as the input.
|
|
2011
|
+
When _DEBUG_TIMING is on, merges per-verse timing dicts back into
|
|
2012
|
+
the main process's _category_totals so flush_profiling_report() works.
|
|
2013
|
+
Falls back to serial processing for small batches below BATCH_PARALLEL_THRESHOLD.
|
|
2014
|
+
"""
|
|
2015
|
+
if len(rw_strs) < BATCH_PARALLEL_THRESHOLD:
|
|
2016
|
+
return [self.identify_meter(s, resplit_option=resplit_option,
|
|
2017
|
+
resplit_keep_midpoint=resplit_keep_midpoint, from_scheme=from_scheme)
|
|
2018
|
+
for s in rw_strs]
|
|
2019
|
+
|
|
2020
|
+
args = [(s, resplit_option, resplit_keep_midpoint, from_scheme, _DEBUG_TIMING) for s in rw_strs]
|
|
2021
|
+
with ProcessPoolExecutor(max_workers=BATCH_MAX_WORKERS) as executor:
|
|
2022
|
+
results = list(executor.map(_identify_meter_worker, args))
|
|
2023
|
+
|
|
2024
|
+
if _DEBUG_TIMING:
|
|
2025
|
+
for V, verse_times, cat in results:
|
|
2026
|
+
_section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + verse_times.pop('wiggle_count', 0)
|
|
2027
|
+
bucket = _category_totals.setdefault(cat, {})
|
|
2028
|
+
for k, v in verse_times.items():
|
|
2029
|
+
bucket[k] = bucket.get(k, 0.0) + v
|
|
2030
|
+
bucket['_count'] = bucket.get('_count', 0) + 1
|
|
2031
|
+
if _verse_is_perfect(V):
|
|
2032
|
+
bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
|
|
2033
|
+
return [V for V, _, _ in results]
|
|
2034
|
+
|
|
2035
|
+
return results
|
|
2036
|
+
|
|
2037
|
+
|
|
2038
|
+
def _identify_meter_worker(args):
|
|
2039
|
+
"""Module-level worker function (must be picklable). One verse per call."""
|
|
2040
|
+
rw_str, resplit_option, resplit_keep_midpoint, from_scheme, debug_timing = args
|
|
2041
|
+
if debug_timing:
|
|
2042
|
+
import skrutable.utils as _utils
|
|
2043
|
+
_utils._DEBUG_TIMING = True
|
|
2044
|
+
import skrutable.meter_identification as _mi
|
|
2045
|
+
_mi._DEBUG_TIMING = True
|
|
2046
|
+
MI = MeterIdentifier()
|
|
2047
|
+
all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
|
|
2048
|
+
'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
|
|
2049
|
+
'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
|
|
2050
|
+
if debug_timing:
|
|
2051
|
+
pre = {k: _section_totals.get(k, 0.0) for k in all_keys}
|
|
2052
|
+
pre_wiggle = _section_totals.get('wiggle_count', 0)
|
|
2053
|
+
V = MI.identify_meter(
|
|
2054
|
+
rw_str,
|
|
2055
|
+
resplit_option=resplit_option,
|
|
2056
|
+
resplit_keep_midpoint=resplit_keep_midpoint,
|
|
2057
|
+
from_scheme=from_scheme,
|
|
2058
|
+
)
|
|
2059
|
+
if debug_timing:
|
|
2060
|
+
verse_times = {k: _section_totals.get(k, 0.0) - pre[k] for k in all_keys}
|
|
2061
|
+
verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
|
|
2062
|
+
verse_times['wiggle_count'] = _section_totals.get('wiggle_count', 0) - pre_wiggle
|
|
2063
|
+
cat = _meter_label_to_category(V.meter_label)
|
|
2064
|
+
return V, verse_times, cat
|
|
2065
|
+
return V
|
|
@@ -1 +0,0 @@
|
|
|
1
|
-
__version__ = "2.6.1"
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|
|
File without changes
|