readsight 1.0.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (238) hide show
  1. readsight-1.0.0/LICENSE +21 -0
  2. readsight-1.0.0/PKG-INFO +339 -0
  3. readsight-1.0.0/README.md +305 -0
  4. readsight-1.0.0/pyproject.toml +104 -0
  5. readsight-1.0.0/setup.cfg +4 -0
  6. readsight-1.0.0/src/readsight/__init__.py +35 -0
  7. readsight-1.0.0/src/readsight/config.py +22 -0
  8. readsight-1.0.0/src/readsight/data/languages/af.json +31 -0
  9. readsight-1.0.0/src/readsight/data/languages/ar.json +31 -0
  10. readsight-1.0.0/src/readsight/data/languages/as.json +31 -0
  11. readsight-1.0.0/src/readsight/data/languages/be.json +31 -0
  12. readsight-1.0.0/src/readsight/data/languages/bg.json +31 -0
  13. readsight-1.0.0/src/readsight/data/languages/bn.json +31 -0
  14. readsight-1.0.0/src/readsight/data/languages/ca.json +31 -0
  15. readsight-1.0.0/src/readsight/data/languages/cop.json +31 -0
  16. readsight-1.0.0/src/readsight/data/languages/cs.json +31 -0
  17. readsight-1.0.0/src/readsight/data/languages/cu.json +31 -0
  18. readsight-1.0.0/src/readsight/data/languages/cy.json +31 -0
  19. readsight-1.0.0/src/readsight/data/languages/da.json +31 -0
  20. readsight-1.0.0/src/readsight/data/languages/de-1901.json +43 -0
  21. readsight-1.0.0/src/readsight/data/languages/de-1996.json +43 -0
  22. readsight-1.0.0/src/readsight/data/languages/de-ch-1901.json +43 -0
  23. readsight-1.0.0/src/readsight/data/languages/el-monoton.json +31 -0
  24. readsight-1.0.0/src/readsight/data/languages/el-polyton.json +31 -0
  25. readsight-1.0.0/src/readsight/data/languages/en-gb.json +93 -0
  26. readsight-1.0.0/src/readsight/data/languages/en-us.json +117 -0
  27. readsight-1.0.0/src/readsight/data/languages/eo.json +31 -0
  28. readsight-1.0.0/src/readsight/data/languages/es.json +52 -0
  29. readsight-1.0.0/src/readsight/data/languages/et.json +31 -0
  30. readsight-1.0.0/src/readsight/data/languages/eu.json +31 -0
  31. readsight-1.0.0/src/readsight/data/languages/fa.json +31 -0
  32. readsight-1.0.0/src/readsight/data/languages/fi-x-school.json +31 -0
  33. readsight-1.0.0/src/readsight/data/languages/fi.json +31 -0
  34. readsight-1.0.0/src/readsight/data/languages/fr.json +40 -0
  35. readsight-1.0.0/src/readsight/data/languages/fur.json +31 -0
  36. readsight-1.0.0/src/readsight/data/languages/ga.json +31 -0
  37. readsight-1.0.0/src/readsight/data/languages/gl.json +31 -0
  38. readsight-1.0.0/src/readsight/data/languages/grc.json +31 -0
  39. readsight-1.0.0/src/readsight/data/languages/gu.json +31 -0
  40. readsight-1.0.0/src/readsight/data/languages/he.json +31 -0
  41. readsight-1.0.0/src/readsight/data/languages/hi.json +31 -0
  42. readsight-1.0.0/src/readsight/data/languages/hr.json +31 -0
  43. readsight-1.0.0/src/readsight/data/languages/hsb.json +31 -0
  44. readsight-1.0.0/src/readsight/data/languages/hu.json +31 -0
  45. readsight-1.0.0/src/readsight/data/languages/hy.json +31 -0
  46. readsight-1.0.0/src/readsight/data/languages/ia.json +31 -0
  47. readsight-1.0.0/src/readsight/data/languages/id.json +31 -0
  48. readsight-1.0.0/src/readsight/data/languages/is.json +31 -0
  49. readsight-1.0.0/src/readsight/data/languages/it.json +43 -0
  50. readsight-1.0.0/src/readsight/data/languages/ka.json +31 -0
  51. readsight-1.0.0/src/readsight/data/languages/kk.json +31 -0
  52. readsight-1.0.0/src/readsight/data/languages/kmr.json +31 -0
  53. readsight-1.0.0/src/readsight/data/languages/kn.json +31 -0
  54. readsight-1.0.0/src/readsight/data/languages/la-x-classic.json +31 -0
  55. readsight-1.0.0/src/readsight/data/languages/la-x-liturgic.json +31 -0
  56. readsight-1.0.0/src/readsight/data/languages/la.json +31 -0
  57. readsight-1.0.0/src/readsight/data/languages/lt.json +31 -0
  58. readsight-1.0.0/src/readsight/data/languages/lv.json +31 -0
  59. readsight-1.0.0/src/readsight/data/languages/mk.json +31 -0
  60. readsight-1.0.0/src/readsight/data/languages/ml.json +31 -0
  61. readsight-1.0.0/src/readsight/data/languages/mn-cyrl-x-lmc.json +31 -0
  62. readsight-1.0.0/src/readsight/data/languages/mn-cyrl.json +31 -0
  63. readsight-1.0.0/src/readsight/data/languages/mr.json +31 -0
  64. readsight-1.0.0/src/readsight/data/languages/mul-ethi.json +31 -0
  65. readsight-1.0.0/src/readsight/data/languages/nb.json +31 -0
  66. readsight-1.0.0/src/readsight/data/languages/nl.json +40 -0
  67. readsight-1.0.0/src/readsight/data/languages/nn.json +31 -0
  68. readsight-1.0.0/src/readsight/data/languages/oc.json +31 -0
  69. readsight-1.0.0/src/readsight/data/languages/or.json +31 -0
  70. readsight-1.0.0/src/readsight/data/languages/pa.json +31 -0
  71. readsight-1.0.0/src/readsight/data/languages/pi.json +31 -0
  72. readsight-1.0.0/src/readsight/data/languages/pl.json +34 -0
  73. readsight-1.0.0/src/readsight/data/languages/pms.json +31 -0
  74. readsight-1.0.0/src/readsight/data/languages/pt.json +40 -0
  75. readsight-1.0.0/src/readsight/data/languages/rm.json +31 -0
  76. readsight-1.0.0/src/readsight/data/languages/ro.json +31 -0
  77. readsight-1.0.0/src/readsight/data/languages/ru.json +40 -0
  78. readsight-1.0.0/src/readsight/data/languages/sa.json +31 -0
  79. readsight-1.0.0/src/readsight/data/languages/sh-cyrl.json +31 -0
  80. readsight-1.0.0/src/readsight/data/languages/sh-latn.json +31 -0
  81. readsight-1.0.0/src/readsight/data/languages/sk.json +31 -0
  82. readsight-1.0.0/src/readsight/data/languages/sl.json +31 -0
  83. readsight-1.0.0/src/readsight/data/languages/sq.json +31 -0
  84. readsight-1.0.0/src/readsight/data/languages/sr-cyrl.json +31 -0
  85. readsight-1.0.0/src/readsight/data/languages/sv.json +31 -0
  86. readsight-1.0.0/src/readsight/data/languages/ta.json +31 -0
  87. readsight-1.0.0/src/readsight/data/languages/te.json +31 -0
  88. readsight-1.0.0/src/readsight/data/languages/th.json +31 -0
  89. readsight-1.0.0/src/readsight/data/languages/tk.json +31 -0
  90. readsight-1.0.0/src/readsight/data/languages/tr.json +40 -0
  91. readsight-1.0.0/src/readsight/data/languages/uk.json +31 -0
  92. readsight-1.0.0/src/readsight/data/languages/vi.json +31 -0
  93. readsight-1.0.0/src/readsight/data/languages/zh-latn-pinyin.json +31 -0
  94. readsight-1.0.0/src/readsight/data/patterns/hyph-af.tex +10594 -0
  95. readsight-1.0.0/src/readsight/data/patterns/hyph-ar.tex +24 -0
  96. readsight-1.0.0/src/readsight/data/patterns/hyph-as.tex +142 -0
  97. readsight-1.0.0/src/readsight/data/patterns/hyph-be.tex +1630 -0
  98. readsight-1.0.0/src/readsight/data/patterns/hyph-bg.tex +7785 -0
  99. readsight-1.0.0/src/readsight/data/patterns/hyph-bn.tex +142 -0
  100. readsight-1.0.0/src/readsight/data/patterns/hyph-ca.tex +296 -0
  101. readsight-1.0.0/src/readsight/data/patterns/hyph-cop.tex +267 -0
  102. readsight-1.0.0/src/readsight/data/patterns/hyph-cs.tex +3684 -0
  103. readsight-1.0.0/src/readsight/data/patterns/hyph-cu.tex +14727 -0
  104. readsight-1.0.0/src/readsight/data/patterns/hyph-cy.tex +6791 -0
  105. readsight-1.0.0/src/readsight/data/patterns/hyph-da.tex +1202 -0
  106. readsight-1.0.0/src/readsight/data/patterns/hyph-de-1901.tex +36485 -0
  107. readsight-1.0.0/src/readsight/data/patterns/hyph-de-1996.tex +36797 -0
  108. readsight-1.0.0/src/readsight/data/patterns/hyph-de-ch-1901.tex +35926 -0
  109. readsight-1.0.0/src/readsight/data/patterns/hyph-el-monoton.tex +505 -0
  110. readsight-1.0.0/src/readsight/data/patterns/hyph-el-polyton.tex +881 -0
  111. readsight-1.0.0/src/readsight/data/patterns/hyph-en-gb.tex +8615 -0
  112. readsight-1.0.0/src/readsight/data/patterns/hyph-en-us.tex +5018 -0
  113. readsight-1.0.0/src/readsight/data/patterns/hyph-eo.tex +528 -0
  114. readsight-1.0.0/src/readsight/data/patterns/hyph-es.tex +2175 -0
  115. readsight-1.0.0/src/readsight/data/patterns/hyph-et.tex +3754 -0
  116. readsight-1.0.0/src/readsight/data/patterns/hyph-eu.tex +123 -0
  117. readsight-1.0.0/src/readsight/data/patterns/hyph-fa.tex +26 -0
  118. readsight-1.0.0/src/readsight/data/patterns/hyph-fi-x-school.tex +349 -0
  119. readsight-1.0.0/src/readsight/data/patterns/hyph-fi.tex +330 -0
  120. readsight-1.0.0/src/readsight/data/patterns/hyph-fr.tex +1372 -0
  121. readsight-1.0.0/src/readsight/data/patterns/hyph-fur.tex +488 -0
  122. readsight-1.0.0/src/readsight/data/patterns/hyph-ga.tex +6152 -0
  123. readsight-1.0.0/src/readsight/data/patterns/hyph-gl.tex +749 -0
  124. readsight-1.0.0/src/readsight/data/patterns/hyph-grc.tex +2149 -0
  125. readsight-1.0.0/src/readsight/data/patterns/hyph-gu.tex +135 -0
  126. readsight-1.0.0/src/readsight/data/patterns/hyph-he.tex +25 -0
  127. readsight-1.0.0/src/readsight/data/patterns/hyph-hi.tex +139 -0
  128. readsight-1.0.0/src/readsight/data/patterns/hyph-hr.tex +1543 -0
  129. readsight-1.0.0/src/readsight/data/patterns/hyph-hsb.tex +1591 -0
  130. readsight-1.0.0/src/readsight/data/patterns/hyph-hu.tex +62894 -0
  131. readsight-1.0.0/src/readsight/data/patterns/hyph-hy.tex +260 -0
  132. readsight-1.0.0/src/readsight/data/patterns/hyph-ia.tex +686 -0
  133. readsight-1.0.0/src/readsight/data/patterns/hyph-id.tex +101 -0
  134. readsight-1.0.0/src/readsight/data/patterns/hyph-is.tex +4223 -0
  135. readsight-1.0.0/src/readsight/data/patterns/hyph-it.tex +431 -0
  136. readsight-1.0.0/src/readsight/data/patterns/hyph-ka.tex +2145 -0
  137. readsight-1.0.0/src/readsight/data/patterns/hyph-kk.tex +5806 -0
  138. readsight-1.0.0/src/readsight/data/patterns/hyph-kmr.tex +349 -0
  139. readsight-1.0.0/src/readsight/data/patterns/hyph-kn.tex +142 -0
  140. readsight-1.0.0/src/readsight/data/patterns/hyph-la-x-classic.tex +26160 -0
  141. readsight-1.0.0/src/readsight/data/patterns/hyph-la-x-liturgic.tex +2008 -0
  142. readsight-1.0.0/src/readsight/data/patterns/hyph-la.tex +237 -0
  143. readsight-1.0.0/src/readsight/data/patterns/hyph-lt.tex +1618 -0
  144. readsight-1.0.0/src/readsight/data/patterns/hyph-lv.tex +11621 -0
  145. readsight-1.0.0/src/readsight/data/patterns/hyph-mk.tex +759 -0
  146. readsight-1.0.0/src/readsight/data/patterns/hyph-ml.tex +157 -0
  147. readsight-1.0.0/src/readsight/data/patterns/hyph-mn-cyrl-x-lmc.tex +578 -0
  148. readsight-1.0.0/src/readsight/data/patterns/hyph-mn-cyrl.tex +1060 -0
  149. readsight-1.0.0/src/readsight/data/patterns/hyph-mr.tex +139 -0
  150. readsight-1.0.0/src/readsight/data/patterns/hyph-mul-ethi.tex +518 -0
  151. readsight-1.0.0/src/readsight/data/patterns/hyph-nb.tex +50 -0
  152. readsight-1.0.0/src/readsight/data/patterns/hyph-nl.tex +12892 -0
  153. readsight-1.0.0/src/readsight/data/patterns/hyph-nn.tex +48 -0
  154. readsight-1.0.0/src/readsight/data/patterns/hyph-oc.tex +345 -0
  155. readsight-1.0.0/src/readsight/data/patterns/hyph-or.tex +133 -0
  156. readsight-1.0.0/src/readsight/data/patterns/hyph-pa.tex +129 -0
  157. readsight-1.0.0/src/readsight/data/patterns/hyph-pi.tex +140 -0
  158. readsight-1.0.0/src/readsight/data/patterns/hyph-pl.tex +4154 -0
  159. readsight-1.0.0/src/readsight/data/patterns/hyph-pms.tex +434 -0
  160. readsight-1.0.0/src/readsight/data/patterns/hyph-pt.tex +503 -0
  161. readsight-1.0.0/src/readsight/data/patterns/hyph-rm.tex +452 -0
  162. readsight-1.0.0/src/readsight/data/patterns/hyph-ro.tex +688 -0
  163. readsight-1.0.0/src/readsight/data/patterns/hyph-ru.tex +7255 -0
  164. readsight-1.0.0/src/readsight/data/patterns/hyph-sa.tex +899 -0
  165. readsight-1.0.0/src/readsight/data/patterns/hyph-sh-cyrl.tex +2822 -0
  166. readsight-1.0.0/src/readsight/data/patterns/hyph-sh-latn.tex +2839 -0
  167. readsight-1.0.0/src/readsight/data/patterns/hyph-sk.tex +2529 -0
  168. readsight-1.0.0/src/readsight/data/patterns/hyph-sl.tex +1144 -0
  169. readsight-1.0.0/src/readsight/data/patterns/hyph-sq.tex +108 -0
  170. readsight-1.0.0/src/readsight/data/patterns/hyph-sr-cyrl.tex +2724 -0
  171. readsight-1.0.0/src/readsight/data/patterns/hyph-sv.tex +4750 -0
  172. readsight-1.0.0/src/readsight/data/patterns/hyph-ta.tex +140 -0
  173. readsight-1.0.0/src/readsight/data/patterns/hyph-te.tex +141 -0
  174. readsight-1.0.0/src/readsight/data/patterns/hyph-th.tex +4888 -0
  175. readsight-1.0.0/src/readsight/data/patterns/hyph-tk.tex +2423 -0
  176. readsight-1.0.0/src/readsight/data/patterns/hyph-tr.tex +671 -0
  177. readsight-1.0.0/src/readsight/data/patterns/hyph-uk.tex +3661 -0
  178. readsight-1.0.0/src/readsight/data/patterns/hyph-vi.tex +25 -0
  179. readsight-1.0.0/src/readsight/data/patterns/hyph-zh-latn-pinyin.tex +262 -0
  180. readsight-1.0.0/src/readsight/engine.py +260 -0
  181. readsight-1.0.0/src/readsight/exceptions.py +35 -0
  182. readsight-1.0.0/src/readsight/formula/__init__.py +0 -0
  183. readsight-1.0.0/src/readsight/formula/automated_readability_index.py +36 -0
  184. readsight-1.0.0/src/readsight/formula/coleman_liau.py +41 -0
  185. readsight-1.0.0/src/readsight/formula/crawford.py +50 -0
  186. readsight-1.0.0/src/readsight/formula/dale_chall.py +53 -0
  187. readsight-1.0.0/src/readsight/formula/fernandez_huerta.py +49 -0
  188. readsight-1.0.0/src/readsight/formula/flesch_kincaid_grade_level.py +67 -0
  189. readsight-1.0.0/src/readsight/formula/flesch_reading_ease.py +69 -0
  190. readsight-1.0.0/src/readsight/formula/fog_pl.py +51 -0
  191. readsight-1.0.0/src/readsight/formula/formula.py +21 -0
  192. readsight-1.0.0/src/readsight/formula/formula_registry.py +45 -0
  193. readsight-1.0.0/src/readsight/formula/formula_registry_factory.py +44 -0
  194. readsight-1.0.0/src/readsight/formula/formula_result.py +13 -0
  195. readsight-1.0.0/src/readsight/formula/grade_level_interpretation.py +36 -0
  196. readsight-1.0.0/src/readsight/formula/gulpease.py +45 -0
  197. readsight-1.0.0/src/readsight/formula/gunning_fog.py +54 -0
  198. readsight-1.0.0/src/readsight/formula/gutierrez_polini.py +46 -0
  199. readsight-1.0.0/src/readsight/formula/lix.py +62 -0
  200. readsight-1.0.0/src/readsight/formula/osman.py +52 -0
  201. readsight-1.0.0/src/readsight/formula/smog_index.py +36 -0
  202. readsight-1.0.0/src/readsight/formula/spache.py +47 -0
  203. readsight-1.0.0/src/readsight/formula/szigriszt_pazos.py +54 -0
  204. readsight-1.0.0/src/readsight/formula/text_statistics_helper.py +18 -0
  205. readsight-1.0.0/src/readsight/formula/wiener_sachtextformel.py +93 -0
  206. readsight-1.0.0/src/readsight/hyphenation/__init__.py +0 -0
  207. readsight-1.0.0/src/readsight/hyphenation/cache/__init__.py +0 -0
  208. readsight-1.0.0/src/readsight/hyphenation/cache/json_pattern_cache.py +87 -0
  209. readsight-1.0.0/src/readsight/hyphenation/cache/pattern_cache.py +25 -0
  210. readsight-1.0.0/src/readsight/hyphenation/hyphenation_exceptions_collection.py +26 -0
  211. readsight-1.0.0/src/readsight/hyphenation/hyphenation_override.py +9 -0
  212. readsight-1.0.0/src/readsight/hyphenation/hyphenator.py +13 -0
  213. readsight-1.0.0/src/readsight/hyphenation/liang_hyphenator.py +124 -0
  214. readsight-1.0.0/src/readsight/hyphenation/pattern.py +15 -0
  215. readsight-1.0.0/src/readsight/hyphenation/patterns_collection.py +31 -0
  216. readsight-1.0.0/src/readsight/hyphenation/source/__init__.py +0 -0
  217. readsight-1.0.0/src/readsight/hyphenation/source/pattern_source.py +9 -0
  218. readsight-1.0.0/src/readsight/hyphenation/source/tex_source.py +123 -0
  219. readsight-1.0.0/src/readsight/language/__init__.py +0 -0
  220. readsight-1.0.0/src/readsight/language/json_language_repository.py +44 -0
  221. readsight-1.0.0/src/readsight/language/language.py +48 -0
  222. readsight-1.0.0/src/readsight/language/language_code.py +30 -0
  223. readsight-1.0.0/src/readsight/language/language_repository.py +19 -0
  224. readsight-1.0.0/src/readsight/language/script.py +27 -0
  225. readsight-1.0.0/src/readsight/syllable/__init__.py +0 -0
  226. readsight-1.0.0/src/readsight/syllable/composite_syllable_counter.py +31 -0
  227. readsight-1.0.0/src/readsight/syllable/heuristic_syllable_counter.py +110 -0
  228. readsight-1.0.0/src/readsight/syllable/syllable_counter.py +13 -0
  229. readsight-1.0.0/src/readsight/syllable/tex_syllable_counter.py +15 -0
  230. readsight-1.0.0/src/readsight/text/__init__.py +0 -0
  231. readsight-1.0.0/src/readsight/text/text_analyzer.py +142 -0
  232. readsight-1.0.0/src/readsight/text/text_splitter.py +59 -0
  233. readsight-1.0.0/src/readsight/text/text_statistics.py +16 -0
  234. readsight-1.0.0/src/readsight.egg-info/PKG-INFO +339 -0
  235. readsight-1.0.0/src/readsight.egg-info/SOURCES.txt +236 -0
  236. readsight-1.0.0/src/readsight.egg-info/dependency_links.txt +1 -0
  237. readsight-1.0.0/src/readsight.egg-info/requires.txt +8 -0
  238. readsight-1.0.0/src/readsight.egg-info/top_level.txt +1 -0
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2026 Yevhen Leonidov
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,339 @@
1
+ Metadata-Version: 2.4
2
+ Name: readsight
3
+ Version: 1.0.0
4
+ Summary: Multilingual readability library for Python — 86 languages, 17 formulas, TeX-based syllable counting via Frank M. Liang algorithm.
5
+ Author-email: Yevhen Leonidov <yevhen.leonidov@gmail.com>
6
+ License: MIT
7
+ Project-URL: Homepage, https://github.com/MADEVAL/ReadSightPy
8
+ Project-URL: Issues, https://github.com/MADEVAL/ReadSightPy/issues
9
+ Project-URL: Source, https://github.com/MADEVAL/ReadSightPy
10
+ Keywords: readability,syllable,hyphenation,flesch-kincaid,gunning-fog,smog,coleman-liau,ari,lix,gulpease,wiener-sachtextformel,multilingual,tex,liang
11
+ Classifier: Development Status :: 5 - Production/Stable
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Science/Research
14
+ Classifier: License :: OSI Approved :: MIT License
15
+ Classifier: Operating System :: OS Independent
16
+ Classifier: Programming Language :: Python :: 3
17
+ Classifier: Programming Language :: Python :: 3.10
18
+ Classifier: Programming Language :: Python :: 3.11
19
+ Classifier: Programming Language :: Python :: 3.12
20
+ Classifier: Programming Language :: Python :: 3.13
21
+ Classifier: Topic :: Text Processing :: Linguistic
22
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: regex>=2023.0.0
27
+ Requires-Dist: platformdirs>=3.0.0
28
+ Provides-Extra: dev
29
+ Requires-Dist: pytest>=8.0; extra == "dev"
30
+ Requires-Dist: pytest-cov>=4.0; extra == "dev"
31
+ Requires-Dist: mypy>=1.0; extra == "dev"
32
+ Requires-Dist: ruff>=0.1.0; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # ReadSightPy — Multilingual Readability Engine for Python
36
+
37
+ [![CI](https://github.com/MADEVAL/ReadSightPy/actions/workflows/ci.yml/badge.svg)](https://github.com/MADEVAL/ReadSightPy/actions/workflows/ci.yml)
38
+ [![Python](https://img.shields.io/badge/Python-%3E%3D%203.10-3776AB?logo=python)](https://www.python.org/)
39
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
40
+ [![Tests](https://img.shields.io/badge/tests-133%20passed-brightgreen)](https://github.com/MADEVAL/ReadSightPy)
41
+ [![Mypy](https://img.shields.io/badge/mypy-strict-brightgreen)](https://mypy-lang.org/)
42
+ [![Ruff](https://img.shields.io/badge/ruff-0%20errors-brightgreen)](https://astral.sh/ruff)
43
+ [![Languages](https://img.shields.io/badge/languages-86-9cf)](https://github.com/MADEVAL/ReadSightPy)
44
+ [![Formulas](https://img.shields.io/badge/formulas-17-orange)](https://github.com/MADEVAL/ReadSightPy)
45
+
46
+ ReadSightPy measures text readability across **86 languages** using **17 readability formulas** with language-specific coefficients. Syllable counting is powered by the **Frank M. Liang (TeX) hyphenation algorithm** — the same algorithm used by TeX for decades. All with **zero heavy dependencies**.
47
+
48
+ This is a Python port of [ReadSight](https://github.com/MADEVAL/ReadSight) (PHP).
49
+
50
+ ## Table of Contents
51
+
52
+ - [Installation](#installation)
53
+ - [Quick Start](#quick-start)
54
+ - [Demo](#demo)
55
+ - [Supported Languages](#supported-languages)
56
+ - [Readability Formulas](#readability-formulas)
57
+ - [FormulaResult](#formularesult)
58
+ - [Performance](#performance)
59
+ - [Custom Configuration](#custom-configuration)
60
+ - [Architecture](#architecture)
61
+ - [Data Sources](#data-sources)
62
+ - [Development](#development)
63
+ - [License](#license)
64
+
65
+ ## Installation
66
+
67
+ ```bash
68
+ pip install readsight
69
+ ```
70
+
71
+ **Requirements:**
72
+ - Python >= 3.10
73
+ - `regex` (for Unicode regex `\p{L}` support)
74
+ - `platformdirs` (for cache directory)
75
+
76
+ No other runtime dependencies.
77
+
78
+ ## Quick Start
79
+
80
+ ```python
81
+ from readsight import ReadSight
82
+
83
+ rs = ReadSight("en-us")
84
+
85
+ # Syllable counting
86
+ rs.syllable_count("banana") # 3
87
+ rs.split_syllables("hyphenation") # ['hy', 'phen', 'a', 'tion']
88
+
89
+ # Text analysis
90
+ stats = rs.analyze("The quick brown fox jumps over the lazy dog.")
91
+ print(f"Words: {stats.word_count}, Syllables: {stats.syllable_count}")
92
+
93
+ # Readability formulas
94
+ fre = rs.flesch_reading_ease(text)
95
+ print(f"Flesch Reading Ease: {fre.score} - {fre.interpretation}")
96
+
97
+ fog = rs.gunning_fog(text)
98
+ print(f"Gunning Fog: {fre.score} (grade {fre.grade_level})")
99
+
100
+ lix = rs.lix(text)
101
+ print(f"LIX: {fre.score} - {fre.interpretation}")
102
+ ```
103
+
104
+ ## Demo
105
+
106
+ Run the interactive demo to see ReadSightPy in action:
107
+
108
+ ```bash
109
+ python examples/demo.py
110
+ ```
111
+
112
+ This analyzes built-in sample text and outputs:
113
+ - **Syllable breakdown** with hyphenation points for common words
114
+ - **Text statistics** — letters, words, sentences, syllables, histogram
115
+ - **All applicable readability formulas** with scores and interpretations
116
+
117
+ Compare the same text across 6 languages:
118
+
119
+ ```bash
120
+ # Built into demo.py — runs multilingual comparison automatically
121
+ python examples/demo.py
122
+ ```
123
+
124
+ ## Supported Languages
125
+
126
+ 86 languages across **19 writing systems**: Latin, Cyrillic, Arabic, Hebrew, Devanagari, Bengali, Tamil, Thai, Greek, Armenian, Georgian, Gujarati, Gurmukhi, Kannada, Malayalam, Odia, Telugu, Ethiopic, Coptic.
127
+
128
+ ```python
129
+ rs = ReadSight("ru") # Russian
130
+ rs = ReadSight("de-1996") # German (1996 reform)
131
+ rs = ReadSight("es") # Spanish
132
+ rs = ReadSight("th") # Thai
133
+
134
+ # List all supported languages
135
+ langs = ReadSight.get_supported_languages()
136
+ # ['af', 'ar', 'as', 'be', 'bg', 'bn', 'ca', 'cop', 'cs', 'cu', 'cy', 'da',
137
+ # 'de-1901', 'de-1996', 'de-ch-1901', 'el-monoton', 'el-polyton', 'en-gb',
138
+ # 'en-us', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fi-x-school', 'fr', 'fur',
139
+ # 'ga', 'gl', 'grc', 'gu', 'he', 'hi', 'hr', 'hsb', 'hu', 'hy', 'ia', 'id',
140
+ # 'is', 'it', 'ka', 'kk', 'kmr', 'kn', 'la', 'la-x-classic', 'la-x-liturgic',
141
+ # 'lt', 'lv', 'mk', 'ml', 'mn-cyrl', 'mn-cyrl-x-lmc', 'mr', 'mul-ethi', 'nb',
142
+ # 'nl', 'nn', 'oc', 'or', 'pa', 'pi', 'pl', 'pms', 'pt', 'rm', 'ro', 'ru',
143
+ # 'sa', 'sh-cyrl', 'sh-latn', 'sk', 'sl', 'sq', 'sr-cyrl', 'sv', 'ta', 'te',
144
+ # 'th', 'tk', 'tr', 'uk', 'vi', 'zh-latn-pinyin']
145
+ ```
146
+
147
+ ## Readability Formulas
148
+
149
+ ### Universal (all 86 languages)
150
+
151
+ | Formula | Method | Type | Score Range |
152
+ |---|---|---|---|
153
+ | Gunning Fog | `gunning_fog()` | Syllable-based | 0–20+ |
154
+ | SMOG Index | `smog_index()` | Syllable-based | 3–18+ |
155
+ | Coleman-Liau | `coleman_liau()` | Letter-based | 0–18+ |
156
+ | ARI | `automated_readability_index()` | Letter-based | 0–18+ |
157
+ | LIX | `lix()` | Letter-based | 20–60+ |
158
+
159
+ ### Language-Specific
160
+
161
+ | Language | Formulas |
162
+ |---|---|
163
+ | English (`en-us`, `en-gb`) | Flesch Reading Ease, FK Grade Level, Dale-Chall*, Spache* |
164
+ | German (`de-*`) | Flesch Reading Ease (Amstad), FKGL, Wiener Sachtextformel (4 variants) |
165
+ | Russian (`ru`) | Flesch Reading Ease (Oborneva), FKGL |
166
+ | Spanish (`es`) | Flesch Reading Ease, Fernandez-Huerta, Szigriszt-Pazos, Gutierrez-Polini, Crawford |
167
+ | Italian (`it`) | Flesch Reading Ease, Gulpease |
168
+ | French (`fr`) | Flesch Reading Ease (Kandel-Moles) |
169
+ | Dutch (`nl`) | Flesch Reading Ease (Douma) |
170
+ | Portuguese (`pt`) | Flesch Reading Ease (Martins) |
171
+ | Turkish (`tr`) | Flesch Reading Ease (Ateşman) |
172
+ | Polish (`pl`) | FOG-PL |
173
+ | Arabic (`ar`) | OSMAN |
174
+
175
+ > **\*Note:** Dale-Chall and Spache formulas use a syllable-based heuristic to estimate difficult words (1-syllable ≈ easy). This is a simplified estimation, not based on the original Dale/Spache word lists.
176
+
177
+ Generic dispatching:
178
+
179
+ ```python
180
+ result = rs.score("gunning_fog", text)
181
+ result = rs.score("wiener_sachtextformel", text)
182
+ ```
183
+
184
+ ## FormulaResult
185
+
186
+ ```python
187
+ result.score # float — raw formula score
188
+ result.grade_level # float | None — normalized grade level (FKGL, GF, SMOG, CL, ARI)
189
+ result.interpretation # str — qualitative interpretation ("Easy", "Hard")
190
+ result.formula_name # str — formula key
191
+ result.language_code # str — language code used
192
+ result.inputs # dict[str, float | int] — intermediate values for debugging
193
+ ```
194
+
195
+ ### API Reference
196
+
197
+ #### Text Analysis Methods
198
+
199
+ ```python
200
+ rs.syllable_count(word: str) -> int
201
+ rs.split_syllables(word: str) -> list[str]
202
+ rs.split_word(word: str) -> list[str]
203
+ rs.word_count(text: str) -> int
204
+ rs.sentence_count(text: str) -> int
205
+ rs.letter_count(text: str) -> int
206
+ rs.total_syllables(text: str) -> int
207
+ rs.average_syllables_per_word(text: str) -> float
208
+ rs.average_words_per_sentence(text: str) -> float
209
+ rs.polysyllable_count(text: str, count_proper_nouns: bool = True) -> int
210
+ rs.words_with_more_than_n_syllables(text: str, n: int, count_proper_nouns: bool = True) -> int
211
+ rs.histogram_syllables(text: str) -> dict[int, int]
212
+ rs.analyze(text: str) -> TextStatistics
213
+ ```
214
+
215
+ #### Formula Methods
216
+
217
+ ```python
218
+ rs.flesch_reading_ease(text: str) -> FormulaResult
219
+ rs.flesch_kincaid_grade_level(text: str) -> FormulaResult
220
+ rs.gunning_fog(text: str) -> FormulaResult
221
+ rs.smog_index(text: str) -> FormulaResult
222
+ rs.coleman_liau(text: str) -> FormulaResult
223
+ rs.automated_readability_index(text: str) -> FormulaResult
224
+ rs.lix(text: str) -> FormulaResult
225
+ rs.wiener_sachtextformel(text: str, variant: int = 1) -> FormulaResult
226
+ rs.gulpease(text: str) -> FormulaResult
227
+ rs.fernandez_huerta(text: str) -> FormulaResult
228
+ rs.szigriszt_pazos(text: str) -> FormulaResult
229
+ rs.gutierrez_polini(text: str) -> FormulaResult
230
+ rs.crawford(text: str) -> FormulaResult
231
+ rs.fog_pl(text: str) -> FormulaResult
232
+ rs.dale_chall(text: str) -> FormulaResult
233
+ rs.spache(text: str) -> FormulaResult
234
+ rs.osman(text: str) -> FormulaResult
235
+ ```
236
+
237
+ ## Performance
238
+
239
+ Measured on CPython 3.12, Intel Core i7 (limited data — full benchmarks TBD):
240
+
241
+ | Operation | Time |
242
+ |---|---|
243
+ | Syllable counting (single word) | ~0.05 ms |
244
+ | Text analysis (45 words) | ~1 ms |
245
+ | Formula calculation (incl. analysis) | ~1 ms |
246
+ | Engine init (en-us, cached) | ~10 ms |
247
+ | Engine init (de-1996, first load) | ~60 ms |
248
+
249
+ Caching: compiled patterns are stored as JSON in the system cache directory (`platformdirs.user_cache_dir`). First load parses `.tex` files (native hyph-utf8 format); subsequent loads use the pre-compiled cache.
250
+
251
+ ## Custom Configuration
252
+
253
+ ```python
254
+ from readsight import ReadSight, Config
255
+
256
+ # Set default paths (before creating engines)
257
+ ReadSight.set_default_config(Config(
258
+ patterns_dir="/custom/patterns",
259
+ languages_dir="/custom/languages",
260
+ cache_dir="/var/cache/readsight",
261
+ ))
262
+
263
+ # Or per-instance
264
+ rs = ReadSight(
265
+ language="en-us",
266
+ patterns_dir="/custom/patterns",
267
+ cache_dir="/custom/cache",
268
+ )
269
+
270
+ # Add custom hyphenation rules
271
+ rs.add_hyphenations({
272
+ "customword": "cus-tom-word",
273
+ })
274
+ ```
275
+
276
+ ## Architecture
277
+
278
+ ```
279
+ ReadSight (facade)
280
+ ├── TextAnalyzer (syllable counting, text metrics)
281
+ │ ├── SyllableCounter (strategy: tex | heuristic | composite)
282
+ │ │ ├── CompositeSyllableCounter (problemWords → heuristic, rest → TeX)
283
+ │ │ ├── HeuristicSyllableCounter (vowel patterns + word list)
284
+ │ │ └── TexSyllableCounter → LiangHyphenator (TeX hyphenation)
285
+ │ ├── LiangHyphenator
286
+ │ │ ├── TexSource (parses .tex from hyph-utf8)
287
+ │ │ ├── PatternsCollection (pattern data)
288
+ │ │ ├── HyphenationExceptionsCollection (word overrides)
289
+ │ │ └── JsonPatternCache (compiled patterns)
290
+ │ └── TextSplitter (word/sentence/letter counting)
291
+ ├── Language (JSON config per language, syllableMode + formulaConfigs)
292
+ └── FormulaRegistry (17 formulas)
293
+ ├── FleschReadingEase (with lang-specific coefficients)
294
+ ├── GunningFog, SMOG, ColemanLiau, ARI, LIX (universal)
295
+ └── WSTF, Gulpease, Fernandez-Huerta, etc. (lang-specific)
296
+ ```
297
+
298
+ ## Data Sources
299
+
300
+ - **TeX hyphenation patterns**: [hyph-utf8](https://ctan.org/pkg/hyph-utf8) version 2026-02-21 —
301
+ the canonical TeX hyphenation repository maintained by the TeX Users Group (TUG).
302
+ 86 `.tex` pattern files from hyph-utf8 covering 86 language variants.
303
+ Packaged under each pattern file's original license.
304
+ - **FRE coefficients**: Amstad (DE), Oborneva (RU), Fernandez-Huerta (ES),
305
+ Vacca-Franchina (IT), Kandel-Moles (FR), Douma (NL), Martins (PT), Ateşman (TR)
306
+ - **WSTF**: Bamberger & Vanecek (DE)
307
+ - **Gulpease**: GULP, La Sapienza University (IT)
308
+
309
+ ## Development
310
+
311
+ ```bash
312
+ pip install -e ".[dev]" # Install with dev dependencies
313
+
314
+ pytest # Run all tests (133 tests)
315
+ pytest --cov=readsight # With coverage report
316
+ mypy src/ # Static type checking (strict mode)
317
+ ruff check src/ tests/ # Lint
318
+ ruff format src/ tests/ # Format
319
+ ```
320
+
321
+ ### Quality Metrics
322
+
323
+ | Metric | Value |
324
+ |---|---|
325
+ | Tests | **133** |
326
+ | Mypy | **Strict mode, 0 errors** |
327
+ | Ruff | **0 errors** |
328
+ | Source files | 56 |
329
+ | Test files | 18 |
330
+ | Supported languages | 86 |
331
+ | Writing systems | 19 |
332
+ | Readability formulas | 17 |
333
+ | Runtime dependencies | 2 (`regex`, `platformdirs`) |
334
+
335
+ ## License
336
+
337
+ MIT. Author: Yevhen Leonidov.
338
+
339
+ TeX pattern files from hyph-utf8 are packaged under their original licenses (see individual file headers).
@@ -0,0 +1,305 @@
1
+ # ReadSightPy — Multilingual Readability Engine for Python
2
+
3
+ [![CI](https://github.com/MADEVAL/ReadSightPy/actions/workflows/ci.yml/badge.svg)](https://github.com/MADEVAL/ReadSightPy/actions/workflows/ci.yml)
4
+ [![Python](https://img.shields.io/badge/Python-%3E%3D%203.10-3776AB?logo=python)](https://www.python.org/)
5
+ [![License](https://img.shields.io/badge/license-MIT-blue.svg)](LICENSE)
6
+ [![Tests](https://img.shields.io/badge/tests-133%20passed-brightgreen)](https://github.com/MADEVAL/ReadSightPy)
7
+ [![Mypy](https://img.shields.io/badge/mypy-strict-brightgreen)](https://mypy-lang.org/)
8
+ [![Ruff](https://img.shields.io/badge/ruff-0%20errors-brightgreen)](https://astral.sh/ruff)
9
+ [![Languages](https://img.shields.io/badge/languages-86-9cf)](https://github.com/MADEVAL/ReadSightPy)
10
+ [![Formulas](https://img.shields.io/badge/formulas-17-orange)](https://github.com/MADEVAL/ReadSightPy)
11
+
12
+ ReadSightPy measures text readability across **86 languages** using **17 readability formulas** with language-specific coefficients. Syllable counting is powered by the **Frank M. Liang (TeX) hyphenation algorithm** — the same algorithm used by TeX for decades. All with **zero heavy dependencies**.
13
+
14
+ This is a Python port of [ReadSight](https://github.com/MADEVAL/ReadSight) (PHP).
15
+
16
+ ## Table of Contents
17
+
18
+ - [Installation](#installation)
19
+ - [Quick Start](#quick-start)
20
+ - [Demo](#demo)
21
+ - [Supported Languages](#supported-languages)
22
+ - [Readability Formulas](#readability-formulas)
23
+ - [FormulaResult](#formularesult)
24
+ - [Performance](#performance)
25
+ - [Custom Configuration](#custom-configuration)
26
+ - [Architecture](#architecture)
27
+ - [Data Sources](#data-sources)
28
+ - [Development](#development)
29
+ - [License](#license)
30
+
31
+ ## Installation
32
+
33
+ ```bash
34
+ pip install readsight
35
+ ```
36
+
37
+ **Requirements:**
38
+ - Python >= 3.10
39
+ - `regex` (for Unicode regex `\p{L}` support)
40
+ - `platformdirs` (for cache directory)
41
+
42
+ No other runtime dependencies.
43
+
44
+ ## Quick Start
45
+
46
+ ```python
47
+ from readsight import ReadSight
48
+
49
+ rs = ReadSight("en-us")
50
+
51
+ # Syllable counting
52
+ rs.syllable_count("banana") # 3
53
+ rs.split_syllables("hyphenation") # ['hy', 'phen', 'a', 'tion']
54
+
55
+ # Text analysis
56
+ stats = rs.analyze("The quick brown fox jumps over the lazy dog.")
57
+ print(f"Words: {stats.word_count}, Syllables: {stats.syllable_count}")
58
+
59
+ # Readability formulas
60
+ fre = rs.flesch_reading_ease(text)
61
+ print(f"Flesch Reading Ease: {fre.score} - {fre.interpretation}")
62
+
63
+ fog = rs.gunning_fog(text)
64
+ print(f"Gunning Fog: {fre.score} (grade {fre.grade_level})")
65
+
66
+ lix = rs.lix(text)
67
+ print(f"LIX: {fre.score} - {fre.interpretation}")
68
+ ```
69
+
70
+ ## Demo
71
+
72
+ Run the interactive demo to see ReadSightPy in action:
73
+
74
+ ```bash
75
+ python examples/demo.py
76
+ ```
77
+
78
+ This analyzes built-in sample text and outputs:
79
+ - **Syllable breakdown** with hyphenation points for common words
80
+ - **Text statistics** — letters, words, sentences, syllables, histogram
81
+ - **All applicable readability formulas** with scores and interpretations
82
+
83
+ Compare the same text across 6 languages:
84
+
85
+ ```bash
86
+ # Built into demo.py — runs multilingual comparison automatically
87
+ python examples/demo.py
88
+ ```
89
+
90
+ ## Supported Languages
91
+
92
+ 86 languages across **19 writing systems**: Latin, Cyrillic, Arabic, Hebrew, Devanagari, Bengali, Tamil, Thai, Greek, Armenian, Georgian, Gujarati, Gurmukhi, Kannada, Malayalam, Odia, Telugu, Ethiopic, Coptic.
93
+
94
+ ```python
95
+ rs = ReadSight("ru") # Russian
96
+ rs = ReadSight("de-1996") # German (1996 reform)
97
+ rs = ReadSight("es") # Spanish
98
+ rs = ReadSight("th") # Thai
99
+
100
+ # List all supported languages
101
+ langs = ReadSight.get_supported_languages()
102
+ # ['af', 'ar', 'as', 'be', 'bg', 'bn', 'ca', 'cop', 'cs', 'cu', 'cy', 'da',
103
+ # 'de-1901', 'de-1996', 'de-ch-1901', 'el-monoton', 'el-polyton', 'en-gb',
104
+ # 'en-us', 'eo', 'es', 'et', 'eu', 'fa', 'fi', 'fi-x-school', 'fr', 'fur',
105
+ # 'ga', 'gl', 'grc', 'gu', 'he', 'hi', 'hr', 'hsb', 'hu', 'hy', 'ia', 'id',
106
+ # 'is', 'it', 'ka', 'kk', 'kmr', 'kn', 'la', 'la-x-classic', 'la-x-liturgic',
107
+ # 'lt', 'lv', 'mk', 'ml', 'mn-cyrl', 'mn-cyrl-x-lmc', 'mr', 'mul-ethi', 'nb',
108
+ # 'nl', 'nn', 'oc', 'or', 'pa', 'pi', 'pl', 'pms', 'pt', 'rm', 'ro', 'ru',
109
+ # 'sa', 'sh-cyrl', 'sh-latn', 'sk', 'sl', 'sq', 'sr-cyrl', 'sv', 'ta', 'te',
110
+ # 'th', 'tk', 'tr', 'uk', 'vi', 'zh-latn-pinyin']
111
+ ```
112
+
113
+ ## Readability Formulas
114
+
115
+ ### Universal (all 86 languages)
116
+
117
+ | Formula | Method | Type | Score Range |
118
+ |---|---|---|---|
119
+ | Gunning Fog | `gunning_fog()` | Syllable-based | 0–20+ |
120
+ | SMOG Index | `smog_index()` | Syllable-based | 3–18+ |
121
+ | Coleman-Liau | `coleman_liau()` | Letter-based | 0–18+ |
122
+ | ARI | `automated_readability_index()` | Letter-based | 0–18+ |
123
+ | LIX | `lix()` | Letter-based | 20–60+ |
124
+
125
+ ### Language-Specific
126
+
127
+ | Language | Formulas |
128
+ |---|---|
129
+ | English (`en-us`, `en-gb`) | Flesch Reading Ease, FK Grade Level, Dale-Chall*, Spache* |
130
+ | German (`de-*`) | Flesch Reading Ease (Amstad), FKGL, Wiener Sachtextformel (4 variants) |
131
+ | Russian (`ru`) | Flesch Reading Ease (Oborneva), FKGL |
132
+ | Spanish (`es`) | Flesch Reading Ease, Fernandez-Huerta, Szigriszt-Pazos, Gutierrez-Polini, Crawford |
133
+ | Italian (`it`) | Flesch Reading Ease, Gulpease |
134
+ | French (`fr`) | Flesch Reading Ease (Kandel-Moles) |
135
+ | Dutch (`nl`) | Flesch Reading Ease (Douma) |
136
+ | Portuguese (`pt`) | Flesch Reading Ease (Martins) |
137
+ | Turkish (`tr`) | Flesch Reading Ease (Ateşman) |
138
+ | Polish (`pl`) | FOG-PL |
139
+ | Arabic (`ar`) | OSMAN |
140
+
141
+ > **\*Note:** Dale-Chall and Spache formulas use a syllable-based heuristic to estimate difficult words (1-syllable ≈ easy). This is a simplified estimation, not based on the original Dale/Spache word lists.
142
+
143
+ Generic dispatching:
144
+
145
+ ```python
146
+ result = rs.score("gunning_fog", text)
147
+ result = rs.score("wiener_sachtextformel", text)
148
+ ```
149
+
150
+ ## FormulaResult
151
+
152
+ ```python
153
+ result.score # float — raw formula score
154
+ result.grade_level # float | None — normalized grade level (FKGL, GF, SMOG, CL, ARI)
155
+ result.interpretation # str — qualitative interpretation ("Easy", "Hard")
156
+ result.formula_name # str — formula key
157
+ result.language_code # str — language code used
158
+ result.inputs # dict[str, float | int] — intermediate values for debugging
159
+ ```
160
+
161
+ ### API Reference
162
+
163
+ #### Text Analysis Methods
164
+
165
+ ```python
166
+ rs.syllable_count(word: str) -> int
167
+ rs.split_syllables(word: str) -> list[str]
168
+ rs.split_word(word: str) -> list[str]
169
+ rs.word_count(text: str) -> int
170
+ rs.sentence_count(text: str) -> int
171
+ rs.letter_count(text: str) -> int
172
+ rs.total_syllables(text: str) -> int
173
+ rs.average_syllables_per_word(text: str) -> float
174
+ rs.average_words_per_sentence(text: str) -> float
175
+ rs.polysyllable_count(text: str, count_proper_nouns: bool = True) -> int
176
+ rs.words_with_more_than_n_syllables(text: str, n: int, count_proper_nouns: bool = True) -> int
177
+ rs.histogram_syllables(text: str) -> dict[int, int]
178
+ rs.analyze(text: str) -> TextStatistics
179
+ ```
180
+
181
+ #### Formula Methods
182
+
183
+ ```python
184
+ rs.flesch_reading_ease(text: str) -> FormulaResult
185
+ rs.flesch_kincaid_grade_level(text: str) -> FormulaResult
186
+ rs.gunning_fog(text: str) -> FormulaResult
187
+ rs.smog_index(text: str) -> FormulaResult
188
+ rs.coleman_liau(text: str) -> FormulaResult
189
+ rs.automated_readability_index(text: str) -> FormulaResult
190
+ rs.lix(text: str) -> FormulaResult
191
+ rs.wiener_sachtextformel(text: str, variant: int = 1) -> FormulaResult
192
+ rs.gulpease(text: str) -> FormulaResult
193
+ rs.fernandez_huerta(text: str) -> FormulaResult
194
+ rs.szigriszt_pazos(text: str) -> FormulaResult
195
+ rs.gutierrez_polini(text: str) -> FormulaResult
196
+ rs.crawford(text: str) -> FormulaResult
197
+ rs.fog_pl(text: str) -> FormulaResult
198
+ rs.dale_chall(text: str) -> FormulaResult
199
+ rs.spache(text: str) -> FormulaResult
200
+ rs.osman(text: str) -> FormulaResult
201
+ ```
202
+
203
+ ## Performance
204
+
205
+ Measured on CPython 3.12, Intel Core i7 (limited data — full benchmarks TBD):
206
+
207
+ | Operation | Time |
208
+ |---|---|
209
+ | Syllable counting (single word) | ~0.05 ms |
210
+ | Text analysis (45 words) | ~1 ms |
211
+ | Formula calculation (incl. analysis) | ~1 ms |
212
+ | Engine init (en-us, cached) | ~10 ms |
213
+ | Engine init (de-1996, first load) | ~60 ms |
214
+
215
+ Caching: compiled patterns are stored as JSON in the system cache directory (`platformdirs.user_cache_dir`). First load parses `.tex` files (native hyph-utf8 format); subsequent loads use the pre-compiled cache.
216
+
217
+ ## Custom Configuration
218
+
219
+ ```python
220
+ from readsight import ReadSight, Config
221
+
222
+ # Set default paths (before creating engines)
223
+ ReadSight.set_default_config(Config(
224
+ patterns_dir="/custom/patterns",
225
+ languages_dir="/custom/languages",
226
+ cache_dir="/var/cache/readsight",
227
+ ))
228
+
229
+ # Or per-instance
230
+ rs = ReadSight(
231
+ language="en-us",
232
+ patterns_dir="/custom/patterns",
233
+ cache_dir="/custom/cache",
234
+ )
235
+
236
+ # Add custom hyphenation rules
237
+ rs.add_hyphenations({
238
+ "customword": "cus-tom-word",
239
+ })
240
+ ```
241
+
242
+ ## Architecture
243
+
244
+ ```
245
+ ReadSight (facade)
246
+ ├── TextAnalyzer (syllable counting, text metrics)
247
+ │ ├── SyllableCounter (strategy: tex | heuristic | composite)
248
+ │ │ ├── CompositeSyllableCounter (problemWords → heuristic, rest → TeX)
249
+ │ │ ├── HeuristicSyllableCounter (vowel patterns + word list)
250
+ │ │ └── TexSyllableCounter → LiangHyphenator (TeX hyphenation)
251
+ │ ├── LiangHyphenator
252
+ │ │ ├── TexSource (parses .tex from hyph-utf8)
253
+ │ │ ├── PatternsCollection (pattern data)
254
+ │ │ ├── HyphenationExceptionsCollection (word overrides)
255
+ │ │ └── JsonPatternCache (compiled patterns)
256
+ │ └── TextSplitter (word/sentence/letter counting)
257
+ ├── Language (JSON config per language, syllableMode + formulaConfigs)
258
+ └── FormulaRegistry (17 formulas)
259
+ ├── FleschReadingEase (with lang-specific coefficients)
260
+ ├── GunningFog, SMOG, ColemanLiau, ARI, LIX (universal)
261
+ └── WSTF, Gulpease, Fernandez-Huerta, etc. (lang-specific)
262
+ ```
263
+
264
+ ## Data Sources
265
+
266
+ - **TeX hyphenation patterns**: [hyph-utf8](https://ctan.org/pkg/hyph-utf8) version 2026-02-21 —
267
+ the canonical TeX hyphenation repository maintained by the TeX Users Group (TUG).
268
+ 86 `.tex` pattern files from hyph-utf8 covering 86 language variants.
269
+ Packaged under each pattern file's original license.
270
+ - **FRE coefficients**: Amstad (DE), Oborneva (RU), Fernandez-Huerta (ES),
271
+ Vacca-Franchina (IT), Kandel-Moles (FR), Douma (NL), Martins (PT), Ateşman (TR)
272
+ - **WSTF**: Bamberger & Vanecek (DE)
273
+ - **Gulpease**: GULP, La Sapienza University (IT)
274
+
275
+ ## Development
276
+
277
+ ```bash
278
+ pip install -e ".[dev]" # Install with dev dependencies
279
+
280
+ pytest # Run all tests (133 tests)
281
+ pytest --cov=readsight # With coverage report
282
+ mypy src/ # Static type checking (strict mode)
283
+ ruff check src/ tests/ # Lint
284
+ ruff format src/ tests/ # Format
285
+ ```
286
+
287
+ ### Quality Metrics
288
+
289
+ | Metric | Value |
290
+ |---|---|
291
+ | Tests | **133** |
292
+ | Mypy | **Strict mode, 0 errors** |
293
+ | Ruff | **0 errors** |
294
+ | Source files | 56 |
295
+ | Test files | 18 |
296
+ | Supported languages | 86 |
297
+ | Writing systems | 19 |
298
+ | Readability formulas | 17 |
299
+ | Runtime dependencies | 2 (`regex`, `platformdirs`) |
300
+
301
+ ## License
302
+
303
+ MIT. Author: Yevhen Leonidov.
304
+
305
+ TeX pattern files from hyph-utf8 are packaged under their original licenses (see individual file headers).