skrutable 2.3.0__tar.gz → 2.5.0__tar.gz
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- {skrutable-2.3.0 → skrutable-2.5.0}/PKG-INFO +1 -1
- skrutable-2.5.0/src/skrutable/__init__.py +1 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/config.json +3 -2
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/manual.md +25 -0
- skrutable-2.5.0/src/skrutable/meter_identification.py +1582 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/meter_patterns.py +39 -20
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/phonemes.py +4 -1
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/scansion.py +16 -6
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/transliteration.py +3 -1
- skrutable-2.5.0/src/skrutable/utils.py +20 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable.egg-info/PKG-INFO +1 -1
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable.egg-info/SOURCES.txt +1 -0
- skrutable-2.3.0/src/skrutable/__init__.py +0 -1
- skrutable-2.3.0/src/skrutable/meter_identification.py +0 -1004
- {skrutable-2.3.0 → skrutable-2.5.0}/LICENSE.md +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/README.md +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/setup.cfg +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/setup.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/config.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/generate_scheme_vectors.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/impossible_bigrams.json +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/run_examples.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/scheme_detection.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/scheme_maps.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/scheme_vectors.json +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/scheme_vectors_mbh.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/splitting.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable/virAma_avoidance.py +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable.egg-info/dependency_links.txt +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable.egg-info/requires.txt +0 -0
- {skrutable-2.3.0 → skrutable-2.5.0}/src/skrutable.egg-info/top_level.txt +0 -0
|
@@ -0,0 +1 @@
|
|
|
1
|
+
__version__ = "2.5.0"
|
|
@@ -29,8 +29,9 @@
|
|
|
29
29
|
"upajāti, non-triṣṭubh, perfect" : 4.5,
|
|
30
30
|
"upajāti, triṣṭubh-jagatī-saṃkara, perfect" : 4,
|
|
31
31
|
"upajāti, non-triṣṭubh, imperfect" : 3,
|
|
32
|
-
"jāti, perfect" :
|
|
33
|
-
"jāti, imperfect" :
|
|
32
|
+
"jāti, perfect" : 9,
|
|
33
|
+
"jāti, imperfect" : 5,
|
|
34
|
+
"jāti, likely" : 3,
|
|
34
35
|
"none found" : 1
|
|
35
36
|
},
|
|
36
37
|
"preserve_punctuation_default" : true,
|
|
@@ -130,6 +130,31 @@ Key terms:
|
|
|
130
130
|
* *jāti*: four quarters with set patterns of total moraic length
|
|
131
131
|
|
|
132
132
|
|
|
133
|
+
# scan timing profiling
|
|
134
|
+
|
|
135
|
+
`skrutable` includes a built-in profiling system for measuring meter identification performance across a corpus. It is disabled by default (`utils._DEBUG_TIMING = False`) and has no runtime cost unless explicitly enabled.
|
|
136
|
+
|
|
137
|
+
To use it, set `_DEBUG_TIMING` before importing `MeterIdentifier`:
|
|
138
|
+
|
|
139
|
+
```python
|
|
140
|
+
import skrutable.utils as _utils
|
|
141
|
+
_utils._DEBUG_TIMING = True
|
|
142
|
+
|
|
143
|
+
from skrutable.meter_identification import MeterIdentifier, flush_profiling_report
|
|
144
|
+
|
|
145
|
+
MI = MeterIdentifier()
|
|
146
|
+
for verse in my_verses:
|
|
147
|
+
MI.identify_meter(verse, resplit_option="resplit_lite", resplit_keep_midpoint=True, from_scheme="IAST") # e.g.
|
|
148
|
+
|
|
149
|
+
flush_profiling_report() # prints table to stderr, resets counters
|
|
150
|
+
# flush_profiling_report(write_file=True) # also writes profiling_debug.txt
|
|
151
|
+
```
|
|
152
|
+
|
|
153
|
+
The table breaks down wall-clock time per meter category with columns for each scan sub-phase (`clean`, `transl`, `syl`, `wts`, `mor+g`) and each identification type (`anuṣṭ`, `samav`, `jāti`, etc.), plus perfect/imperfect verse counts per category.
|
|
154
|
+
|
|
155
|
+
If using the front end, `make launch-profiling` (or `./launch.sh --scan-profiling`) enables profiling for the server process without touching any source files.
|
|
156
|
+
|
|
157
|
+
|
|
133
158
|
# sandhi and compound splitting
|
|
134
159
|
|
|
135
160
|
`skrutable` provides a wrapper for applying pre-trained splitting models via separate online servers ([my own splitter_server for the 2018 model](https://2018emnlp-sanskrit-splitter-server.duckdns.org/) and https://dharmamitra.org). A working internet connection is required for this functionality. The wrapper preserves original sentence length and punctuation, and it also helps utilize the Dharmamitra ByT5-Sanskrit model's ability to distinguish compounds from inter-word breaks.
|