skrutable 2.6.1__tar.gz → 2.6.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (30) hide show
  1. {skrutable-2.6.1 → skrutable-2.6.2}/PKG-INFO +1 -1
  2. skrutable-2.6.2/src/skrutable/__init__.py +1 -0
  3. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/meter_identification.py +79 -1
  4. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable.egg-info/PKG-INFO +1 -1
  5. skrutable-2.6.1/src/skrutable/__init__.py +0 -1
  6. {skrutable-2.6.1 → skrutable-2.6.2}/LICENSE.md +0 -0
  7. {skrutable-2.6.1 → skrutable-2.6.2}/README.md +0 -0
  8. {skrutable-2.6.1 → skrutable-2.6.2}/setup.cfg +0 -0
  9. {skrutable-2.6.1 → skrutable-2.6.2}/setup.py +0 -0
  10. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/config.json +0 -0
  11. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/config.py +0 -0
  12. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/generate_scheme_vectors.py +0 -0
  13. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/impossible_bigrams.json +0 -0
  14. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/manual.md +0 -0
  15. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/meter_patterns.py +0 -0
  16. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/phonemes.py +0 -0
  17. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/run_examples.py +0 -0
  18. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/scansion.py +0 -0
  19. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/scheme_detection.py +0 -0
  20. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/scheme_maps.py +0 -0
  21. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/scheme_vectors.json +0 -0
  22. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/scheme_vectors_mbh.py +0 -0
  23. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/splitting.py +0 -0
  24. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/transliteration.py +0 -0
  25. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/utils.py +0 -0
  26. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable/virAma_avoidance.py +0 -0
  27. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable.egg-info/SOURCES.txt +0 -0
  28. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable.egg-info/dependency_links.txt +0 -0
  29. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable.egg-info/requires.txt +0 -0
  30. {skrutable-2.6.1 → skrutable-2.6.2}/src/skrutable.egg-info/top_level.txt +0 -0
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.1
3
+ Version: 2.6.2
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -0,0 +1 @@
1
+ __version__ = "2.6.2"
@@ -5,9 +5,13 @@ from skrutable.utils import _DEBUG_TIMING, _section_totals, timed
5
5
  import re
6
6
  import time as _time
7
7
  from copy import copy
8
+ from concurrent.futures import ProcessPoolExecutor
8
9
  from dataclasses import dataclass
9
10
  from typing import Optional
10
11
 
12
+ BATCH_MAX_WORKERS = 5
13
+ BATCH_PARALLEL_THRESHOLD = 100
14
+
11
15
  # load config variables
12
16
  config = load_config_dict_from_json_file()
13
17
  scansion_syllable_separator = config["scansion_syllable_separator"] # e.g. " "
@@ -50,10 +54,12 @@ def _verse_is_perfect(V):
50
54
  return getattr(V, 'is_perfect', False)
51
55
 
52
56
 
53
- def flush_profiling_report(write_file=False):
57
+ def flush_profiling_report(write_file=False, wall_clock_secs=None, parallel_workers=None):
54
58
  """Print the accumulated profiling table to stderr, then reset all counters.
55
59
 
56
60
  Pass write_file=True to also write the table to profiling_debug.txt alongside the library source.
61
+ Pass wall_clock_secs to append a timing footer line.
62
+ Pass parallel_workers (int) to show worker count and parallelization speedup; omit or pass None for serial runs.
57
63
  Safe to call even when _DEBUG_TIMING is False (no-op).
58
64
  """
59
65
  if not _DEBUG_TIMING or not _category_totals:
@@ -124,6 +130,13 @@ def flush_profiling_report(write_file=False):
124
130
  + f'{total_scan:.2f}s'.rjust(sub_w)
125
131
  + f'{total_types:.2f}s'.rjust(sub_w)
126
132
  + ' ' + fmt_row(total_scan_vals, total_type_vals))
133
+ if wall_clock_secs is not None:
134
+ table_total = total_scan + total_types
135
+ if parallel_workers is not None:
136
+ speedup = table_total / wall_clock_secs if wall_clock_secs > 0 else float('inf')
137
+ lines.append(f'\n table total (CPU across {parallel_workers} workers, inflated by overhead): {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s | apparent parallelization speedup: {speedup:.2f}x')
138
+ else:
139
+ lines.append(f'\n table total: {table_total:.2f}s | wall-clock: {wall_clock_secs:.2f}s')
127
140
  block = '\n'.join(lines) + '\n'
128
141
  if write_file:
129
142
  timing_path = os.path.join(os.path.dirname(__file__), 'profiling_debug.txt')
@@ -1972,3 +1985,68 @@ class MeterIdentifier(object):
1972
1985
  bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
1973
1986
 
1974
1987
  return V
1988
+
1989
+ def identify_meter_batch(self, rw_strs,
1990
+ resplit_option=default_resplit_option,
1991
+ resplit_keep_midpoint=default_resplit_keep_midpoint,
1992
+ from_scheme=None):
1993
+ """
1994
+ Parallel version of identify_meter() for a list of raw strings.
1995
+
1996
+ Spawns up to BATCH_MAX_WORKERS worker processes, one task per verse.
1997
+ Returns a list of Verse objects in the same order as the input.
1998
+ When _DEBUG_TIMING is on, merges per-verse timing dicts back into
1999
+ the main process's _category_totals so flush_profiling_report() works.
2000
+ Falls back to serial processing for small batches below BATCH_PARALLEL_THRESHOLD.
2001
+ """
2002
+ if len(rw_strs) < BATCH_PARALLEL_THRESHOLD:
2003
+ return [self.identify_meter(s, resplit_option=resplit_option,
2004
+ resplit_keep_midpoint=resplit_keep_midpoint, from_scheme=from_scheme)
2005
+ for s in rw_strs]
2006
+
2007
+ args = [(s, resplit_option, resplit_keep_midpoint, from_scheme, _DEBUG_TIMING) for s in rw_strs]
2008
+ with ProcessPoolExecutor(max_workers=BATCH_MAX_WORKERS) as executor:
2009
+ results = list(executor.map(_identify_meter_worker, args))
2010
+
2011
+ if _DEBUG_TIMING:
2012
+ for V, verse_times, cat in results:
2013
+ _section_totals['wiggle_count'] = _section_totals.get('wiggle_count', 0) + verse_times.pop('wiggle_count', 0)
2014
+ bucket = _category_totals.setdefault(cat, {})
2015
+ for k, v in verse_times.items():
2016
+ bucket[k] = bucket.get(k, 0.0) + v
2017
+ bucket['_count'] = bucket.get('_count', 0) + 1
2018
+ if _verse_is_perfect(V):
2019
+ bucket['_perfect_count'] = bucket.get('_perfect_count', 0) + 1
2020
+ return [V for V, _, _ in results]
2021
+
2022
+ return results
2023
+
2024
+
2025
+ def _identify_meter_worker(args):
2026
+ """Module-level worker function (must be picklable). One verse per call."""
2027
+ rw_str, resplit_option, resplit_keep_midpoint, from_scheme, debug_timing = args
2028
+ if debug_timing:
2029
+ import skrutable.utils as _utils
2030
+ _utils._DEBUG_TIMING = True
2031
+ import skrutable.meter_identification as _mi
2032
+ _mi._DEBUG_TIMING = True
2033
+ MI = MeterIdentifier()
2034
+ all_keys = ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana',
2035
+ 'anuzwuB', 'samavftta', 'upajAti', 'vizamavftta',
2036
+ 'ardhasamavftta_perfect', 'jAti', 'lev_samavftta', 'lev_ardha', 'lev_vizama', 'samavftta_etc')
2037
+ if debug_timing:
2038
+ pre = {k: _section_totals.get(k, 0.0) for k in all_keys}
2039
+ pre_wiggle = _section_totals.get('wiggle_count', 0)
2040
+ V = MI.identify_meter(
2041
+ rw_str,
2042
+ resplit_option=resplit_option,
2043
+ resplit_keep_midpoint=resplit_keep_midpoint,
2044
+ from_scheme=from_scheme,
2045
+ )
2046
+ if debug_timing:
2047
+ verse_times = {k: _section_totals.get(k, 0.0) - pre[k] for k in all_keys}
2048
+ verse_times['scan'] = sum(verse_times[k] for k in ('scan_clean', 'scan_translit', 'scan_syllabify', 'scan_weights', 'scan_morae_gana'))
2049
+ verse_times['wiggle_count'] = _section_totals.get('wiggle_count', 0) - pre_wiggle
2050
+ cat = _meter_label_to_category(V.meter_label)
2051
+ return V, verse_times, cat
2052
+ return V
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.1
2
2
  Name: skrutable
3
- Version: 2.6.1
3
+ Version: 2.6.2
4
4
  Summary: skrutable library for working with Sanskrit text
5
5
  Home-page: https://github.com/tylergneill/skrutable
6
6
  Author: Tyler Neill
@@ -1 +0,0 @@
1
- __version__ = "2.6.1"
File without changes
File without changes
File without changes
File without changes