persian-readability 0.1.2__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,21 @@
1
+ MIT License
2
+
3
+ Copyright (c) 2025 Dr. Mohammad Pirouzan
4
+
5
+ Permission is hereby granted, free of charge, to any person obtaining a copy
6
+ of this software and associated documentation files (the "Software"), to deal
7
+ in the Software without restriction, including without limitation the rights
8
+ to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
9
+ copies of the Software, and to permit persons to whom the Software is
10
+ furnished to do so, subject to the following conditions:
11
+
12
+ The above copyright notice and this permission notice shall be included in all
13
+ copies or substantial portions of the Software.
14
+
15
+ THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
16
+ IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
17
+ FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
18
+ AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
19
+ LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
20
+ OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
21
+ SOFTWARE.
@@ -0,0 +1,313 @@
1
+ Metadata-Version: 2.4
2
+ Name: persian-readability
3
+ Version: 0.1.2
4
+ Summary: A lightweight Python tool for Persian/Farsi readability analysis using the Flesch-Dayani formula.
5
+ Author-email: Mohammad Pirouzan <mohammadpirouzan@gmail.com>
6
+ License-Expression: MIT
7
+ Project-URL: Homepage, https://github.com/Drpirouzan/Persian-Readability
8
+ Project-URL: Repository, https://github.com/Drpirouzan/Persian-Readability
9
+ Project-URL: Issues, https://github.com/Drpirouzan/Persian-Readability/issues
10
+ Keywords: persian,farsi,readability,nlp,flesch,flesch-dayani,text-analysis,persian-language,education,accessibility
11
+ Classifier: Development Status :: 3 - Alpha
12
+ Classifier: Intended Audience :: Developers
13
+ Classifier: Intended Audience :: Education
14
+ Classifier: Intended Audience :: Science/Research
15
+ Classifier: Natural Language :: Persian
16
+ Classifier: Operating System :: OS Independent
17
+ Classifier: Programming Language :: Python :: 3
18
+ Classifier: Programming Language :: Python :: 3.10
19
+ Classifier: Programming Language :: Python :: 3.11
20
+ Classifier: Programming Language :: Python :: 3.12
21
+ Classifier: Topic :: Text Processing
22
+ Classifier: Topic :: Text Processing :: Linguistic
23
+ Requires-Python: >=3.10
24
+ Description-Content-Type: text/markdown
25
+ License-File: LICENSE
26
+ Requires-Dist: hazm
27
+ Provides-Extra: pos
28
+ Requires-Dist: parsivar; extra == "pos"
29
+ Provides-Extra: dev
30
+ Requires-Dist: pytest; extra == "dev"
31
+ Requires-Dist: build; extra == "dev"
32
+ Requires-Dist: twine; extra == "dev"
33
+ Dynamic: license-file
34
+
35
+ # Persian Readability (Flesch–Dayani)
36
+
37
+ A lightweight Python package and command-line tool to calculate the **Flesch–Dayani readability score** for Persian (Farsi) text — with an optional POS-enhanced syllable counter for higher accuracy.
38
+
39
+ ---
40
+
41
+ ## Features
42
+
43
+ - Persian text normalization and tokenization via `hazm`
44
+ - **Punctuation-aware tokenization** — علائم نشانه‌گذاری از شمارش کلمات و هجاها حذف می‌شوند
45
+ - **Two-tier syllable counting:**
46
+ - **POS-enhanced** (Better Accuracy) — if `parsivar` is installed, uses part-of-speech tags to correctly count syllables in verbs with attached prefixes (`میرود`، `نمی‌دانم`) and comparative adjectives (`بهتر`، `بزرگ‌ترین`)
47
+ - **Morphological heuristic** (Good Accuracy) — used automatically if `parsivar` is not installed
48
+ - **Context-aware خواه classifier** — three-layer disambiguation prevents confusing `خواهش`, `خواهر`, `آزادی‌خواه`, and `خواه ... خواه ...` with the future auxiliary (`خواهم رفت`)
49
+ - Computes:
50
+ - Number of sentences, words, letters, and syllables
51
+ - **ASL** — Average Sentence Length (words per sentence)
52
+ - **WL** — Average Word Length (letters per word)
53
+ - **ASYL** — Average Syllables per Word *(used in the original Dayani formula)*
54
+ - Flesch–Dayani readability score
55
+ - **Human-readable level** (e.g. *متوسط — مناسب دانش‌آموزان دبیرستان*)
56
+ - Accepts input from a file, a command-line argument, or **stdin** (pipe-friendly)
57
+ - `--plain` flag for scripting and pipeline use
58
+ - `--verbose` flag for debug logging
59
+ - Warns when text is too short for a reliable score (< 50 words)
60
+
61
+ ---
62
+
63
+ ## Readability Levels
64
+
65
+ | Score | Level |
66
+ |-------|-------|
67
+ | ≥ 90 | بسیار آسان — مناسب کودکان دبستانی |
68
+ | ≥ 80 | آسان — مناسب نوجوانان |
69
+ | ≥ 70 | نسبتاً آسان — مناسب عموم مردم |
70
+ | ≥ 60 | متوسط — مناسب دانش‌آموزان دبیرستان |
71
+ | ≥ 50 | نسبتاً دشوار — مناسب دانشجویان |
72
+ | ≥ 30 | دشوار — مناسب متخصصان |
73
+ | < 30 | بسیار دشوار — متون علمی/تخصصی |
74
+
75
+ ---
76
+
77
+ ## Installation
78
+
79
+ Install from PyPI after release:
80
+
81
+ ```bash
82
+ pip install persian-readability
83
+ ```
84
+
85
+ For local development:
86
+
87
+ ```bash
88
+ pip install -e ".[dev]"
89
+ ```
90
+
91
+ For optional POS-enhanced syllable counting:
92
+
93
+ ```bash
94
+ pip install "persian-readability[pos]"
95
+ ```
96
+
97
+ ---
98
+
99
+ ## Requirements
100
+
101
+ ### Required
102
+
103
+ - Python **3.10** or newer
104
+ - [`hazm`](https://github.com/roshan-research/hazm) — Persian NLP library
105
+
106
+ ```
107
+ pip install hazm
108
+ ```
109
+
110
+ ### Optional (for higher syllable accuracy)
111
+
112
+ - [`parsivar`](https://github.com/ICTRC/Parsivar) — Persian preprocessing toolkit with POS tagger
113
+
114
+ ```
115
+ pip install parsivar
116
+ ```
117
+
118
+ > If `parsivar` is not installed, the script falls back to the morphological heuristic automatically — no configuration needed.
119
+
120
+ ---
121
+
122
+ ## Usage
123
+
124
+ **Direct text:**
125
+
126
+ ```
127
+ persian-readability -t "متن فارسی شما"
128
+ ```
129
+
130
+ **From a file:**
131
+
132
+ ```
133
+ persian-readability -f sample.txt
134
+ ```
135
+
136
+ **From stdin (pipe):**
137
+
138
+ ```
139
+ echo "متن فارسی شما" | persian-readability
140
+ cat article.txt | persian-readability
141
+ ```
142
+
143
+ **Raw score only (for scripting):**
144
+
145
+ ```
146
+ persian-readability -f sample.txt --plain
147
+ ```
148
+
149
+ **With debug logging:**
150
+
151
+ ```
152
+ persian-readability -f sample.txt --verbose
153
+ ```
154
+
155
+ ---
156
+
157
+ ## Python API Usage
158
+
159
+ ```python
160
+ from persian_readability import calculate_readability
161
+
162
+ result = calculate_readability("برای پیشگیری از پوسیدگی دندان، روزی دو بار مسواک بزنید.")
163
+ print(result)
164
+ ```
165
+
166
+ ---
167
+
168
+ ## Real-World Examples
169
+
170
+ ### Example 1 — Public health text
171
+
172
+ **Input:**
173
+
174
+ ```bash
175
+ persian-readability -t "برای پیشگیری از پوسیدگی دندان، بهتر است روزی دو بار مسواک بزنید و مصرف مواد قندی را کاهش دهید."
176
+ ```
177
+
178
+ **Possible use case:**
179
+
180
+ This can help public health educators check whether patient-facing Persian health messages are simple enough for the general public.
181
+
182
+ ---
183
+
184
+ ### Example 2 — Academic text
185
+
186
+ **Input:**
187
+
188
+ ```bash
189
+ persian-readability -t "شاخص‌های زیستی بزاقی می‌توانند در تشخیص زودهنگام برخی بیماری‌های دهان و فک و صورت نقش مهمی داشته باشند."
190
+ ```
191
+
192
+ **Possible use case:**
193
+
194
+ Researchers can compare the readability of Persian academic summaries, abstracts, or educational materials.
195
+
196
+ ---
197
+
198
+ ### Example 3 — Pipeline use
199
+
200
+ **Input:**
201
+
202
+ ```bash
203
+ cat article.txt | persian-readability --plain
204
+ ```
205
+
206
+ **Possible use case:**
207
+
208
+ Developers can integrate the readability score into larger Persian NLP or content-quality workflows.
209
+
210
+ ---
211
+
212
+ ## Sample Output
213
+
214
+ ```text
215
+ ══════════════════════════════════════════════════════
216
+ Persian Readability — Flesch–Dayani
217
+ ══════════════════════════════════════════════════════
218
+ جملات : 5
219
+ کلمات : 87
220
+ حروف : 412
221
+ هجاها : 201
222
+ روش : POS-enhanced — Parsivar
223
+ ────────────────────────────────────────────────────
224
+ ASL (کلمه/جمله) : 17.40
225
+ WL (حرف/کلمه) : 4.74
226
+ ASYL (هجا/کلمه) : 2.31
227
+ ────────────────────────────────────────────────────
228
+ امتیاز Flesch–Dayani : 58.34
229
+ سطح خوانایی : متوسط — مناسب دانش‌آموزان دبیرستان
230
+ ══════════════════════════════════════════════════════
231
+ ```
232
+
233
+ ---
234
+
235
+ ## Formula
236
+
237
+ ```
238
+ FDR = 262.835 − 0.846 × ASYL − 1.015 × ASL
239
+ ```
240
+
241
+ Where **ASYL** = average syllables per word and **ASL** = average words per sentence.
242
+ Higher scores indicate easier text.
243
+
244
+ ---
245
+
246
+ ## How Syllable Accuracy Tiers Work
247
+
248
+ | Mode | Accuracy | How |
249
+ |------|----------|-----|
250
+ | POS-enhanced | ~85% | Parsivar POSTagger (wapiti CRF, Bijankhan corpus) detects verb/adjective tags; prefix/suffix rules applied per POS |
251
+ | Morphological heuristic | ~75% | Counts written long vowels (ا و ی), diacritics, and word-final ه; no POS context |
252
+
253
+ Main cases where POS tagging improves accuracy:
254
+
255
+ - Verbs with attached `می`/`نمی` prefix (no half-space): `میرود` → +1 syllable
256
+ - Comparative/superlative adjectives: `بهترین` → suffix `ترین` = 2 syllables
257
+
258
+ ### خواه Classifier
259
+
260
+ The word `خواه` has multiple roles in Persian. A three-layer classifier resolves ambiguity **before** syllable counting:
261
+
262
+ | Label | Examples | Treatment |
263
+ |-------|---------|-----------|
264
+ | `FUTURE_AUX` | خواهم رفت، نخواهند پذیرفت | syllable count unchanged (هجاشماری base درست است) |
265
+ | `LEXICAL_KHASTAN` | خواهد که برود، این را خواهد | tag اصلی حفظ می‌شود |
266
+ | `PARTICLE_KHAH` | خواه بیاید خواه نیاید | treated as non-verb |
267
+ | `NOMINAL_DERIVATIVE` | خواهش، خواهان، خواهنده | treated as non-verb |
268
+ | `INDEPENDENT_WORD` | خواهر، خواهران | treated as non-verb |
269
+ | `SUFFIX_COMPOUND` | آزادی‌خواه، خیرخواه، دادخواه | treated as non-verb |
270
+
271
+ The classifier uses exact lexical sets (layer 1), suffix-compound detection (layer 2), and a 2-token context window (layer 3) — never a simple prefix regex.
272
+
273
+ ---
274
+
275
+ ## Notes
276
+
277
+ - **Minimum text length:** The Flesch–Dayani formula is designed for running prose. Texts shorter than ~50 words produce unstable scores. A warning is emitted in this case (visible with `--verbose`).
278
+ - **Punctuation filtering:** علائم نشانه‌گذاری فارسی و لاتین (گیومه، نقطه، ویرگول، ...) از لبه‌های هر توکن پاک می‌شوند و توکن‌های تمام‌علامت از شمارش حذف می‌شوند.
279
+ - **stdin:** When running interactively without `-t` or `-f`, the script waits for input and prints a prompt. Press `Ctrl+D` to signal end of input.
280
+ - **Log messages:** All warnings go to stderr and do not affect `--plain` output.
281
+
282
+ ---
283
+
284
+ ## Running Tests
285
+
286
+ ```
287
+ pip install pytest hazm
288
+ python -m pytest tests/test_core.py -v
289
+ ```
290
+
291
+ 76 tests covering: خواه classifier (all 9 document cases), punctuation filtering,
292
+ syllable counting, heuristic limitations, formula verification, and edge cases.
293
+
294
+ ---
295
+
296
+ ## References
297
+
298
+ - Dayani, M. (1374/1995). *سنجش خوانایی متون فارسی*. Persian adaptation of the Flesch Reading Ease formula.
299
+ - Mohtaj et al. (2018). [Parsivar: A Language Processing Toolkit for Persian](https://github.com/ICTRC/Parsivar). LREC 2018.
300
+ - Mohammadi & Khasteh (2020). [A Machine Learning Approach to Persian Text Readability](https://arxiv.org/abs/1810.06639).
301
+ - Sobhe. [hazm — Persian NLP library](https://github.com/roshan-research/hazm).
302
+
303
+ ---
304
+
305
+ ## Author
306
+
307
+ **Dr. Mohammad Pirouzan** — [@Drpirouzan](https://github.com/Drpirouzan)
308
+
309
+ ---
310
+
311
+ ## License
312
+
313
+ MIT License — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,279 @@
1
+ # Persian Readability (Flesch–Dayani)
2
+
3
+ A lightweight Python package and command-line tool to calculate the **Flesch–Dayani readability score** for Persian (Farsi) text — with an optional POS-enhanced syllable counter for higher accuracy.
4
+
5
+ ---
6
+
7
+ ## Features
8
+
9
+ - Persian text normalization and tokenization via `hazm`
10
+ - **Punctuation-aware tokenization** — علائم نشانه‌گذاری از شمارش کلمات و هجاها حذف می‌شوند
11
+ - **Two-tier syllable counting:**
12
+ - **POS-enhanced** (Better Accuracy) — if `parsivar` is installed, uses part-of-speech tags to correctly count syllables in verbs with attached prefixes (`میرود`، `نمی‌دانم`) and comparative adjectives (`بهتر`، `بزرگ‌ترین`)
13
+ - **Morphological heuristic** (Good Accuracy) — used automatically if `parsivar` is not installed
14
+ - **Context-aware خواه classifier** — three-layer disambiguation prevents confusing `خواهش`, `خواهر`, `آزادی‌خواه`, and `خواه ... خواه ...` with the future auxiliary (`خواهم رفت`)
15
+ - Computes:
16
+ - Number of sentences, words, letters, and syllables
17
+ - **ASL** — Average Sentence Length (words per sentence)
18
+ - **WL** — Average Word Length (letters per word)
19
+ - **ASYL** — Average Syllables per Word *(used in the original Dayani formula)*
20
+ - Flesch–Dayani readability score
21
+ - **Human-readable level** (e.g. *متوسط — مناسب دانش‌آموزان دبیرستان*)
22
+ - Accepts input from a file, a command-line argument, or **stdin** (pipe-friendly)
23
+ - `--plain` flag for scripting and pipeline use
24
+ - `--verbose` flag for debug logging
25
+ - Warns when text is too short for a reliable score (< 50 words)
26
+
27
+ ---
28
+
29
+ ## Readability Levels
30
+
31
+ | Score | Level |
32
+ |-------|-------|
33
+ | ≥ 90 | بسیار آسان — مناسب کودکان دبستانی |
34
+ | ≥ 80 | آسان — مناسب نوجوانان |
35
+ | ≥ 70 | نسبتاً آسان — مناسب عموم مردم |
36
+ | ≥ 60 | متوسط — مناسب دانش‌آموزان دبیرستان |
37
+ | ≥ 50 | نسبتاً دشوار — مناسب دانشجویان |
38
+ | ≥ 30 | دشوار — مناسب متخصصان |
39
+ | < 30 | بسیار دشوار — متون علمی/تخصصی |
40
+
41
+ ---
42
+
43
+ ## Installation
44
+
45
+ Install from PyPI after release:
46
+
47
+ ```bash
48
+ pip install persian-readability
49
+ ```
50
+
51
+ For local development:
52
+
53
+ ```bash
54
+ pip install -e ".[dev]"
55
+ ```
56
+
57
+ For optional POS-enhanced syllable counting:
58
+
59
+ ```bash
60
+ pip install "persian-readability[pos]"
61
+ ```
62
+
63
+ ---
64
+
65
+ ## Requirements
66
+
67
+ ### Required
68
+
69
+ - Python **3.10** or newer
70
+ - [`hazm`](https://github.com/roshan-research/hazm) — Persian NLP library
71
+
72
+ ```
73
+ pip install hazm
74
+ ```
75
+
76
+ ### Optional (for higher syllable accuracy)
77
+
78
+ - [`parsivar`](https://github.com/ICTRC/Parsivar) — Persian preprocessing toolkit with POS tagger
79
+
80
+ ```
81
+ pip install parsivar
82
+ ```
83
+
84
+ > If `parsivar` is not installed, the script falls back to the morphological heuristic automatically — no configuration needed.
85
+
86
+ ---
87
+
88
+ ## Usage
89
+
90
+ **Direct text:**
91
+
92
+ ```
93
+ persian-readability -t "متن فارسی شما"
94
+ ```
95
+
96
+ **From a file:**
97
+
98
+ ```
99
+ persian-readability -f sample.txt
100
+ ```
101
+
102
+ **From stdin (pipe):**
103
+
104
+ ```
105
+ echo "متن فارسی شما" | persian-readability
106
+ cat article.txt | persian-readability
107
+ ```
108
+
109
+ **Raw score only (for scripting):**
110
+
111
+ ```
112
+ persian-readability -f sample.txt --plain
113
+ ```
114
+
115
+ **With debug logging:**
116
+
117
+ ```
118
+ persian-readability -f sample.txt --verbose
119
+ ```
120
+
121
+ ---
122
+
123
+ ## Python API Usage
124
+
125
+ ```python
126
+ from persian_readability import calculate_readability
127
+
128
+ result = calculate_readability("برای پیشگیری از پوسیدگی دندان، روزی دو بار مسواک بزنید.")
129
+ print(result)
130
+ ```
131
+
132
+ ---
133
+
134
+ ## Real-World Examples
135
+
136
+ ### Example 1 — Public health text
137
+
138
+ **Input:**
139
+
140
+ ```bash
141
+ persian-readability -t "برای پیشگیری از پوسیدگی دندان، بهتر است روزی دو بار مسواک بزنید و مصرف مواد قندی را کاهش دهید."
142
+ ```
143
+
144
+ **Possible use case:**
145
+
146
+ This can help public health educators check whether patient-facing Persian health messages are simple enough for the general public.
147
+
148
+ ---
149
+
150
+ ### Example 2 — Academic text
151
+
152
+ **Input:**
153
+
154
+ ```bash
155
+ persian-readability -t "شاخص‌های زیستی بزاقی می‌توانند در تشخیص زودهنگام برخی بیماری‌های دهان و فک و صورت نقش مهمی داشته باشند."
156
+ ```
157
+
158
+ **Possible use case:**
159
+
160
+ Researchers can compare the readability of Persian academic summaries, abstracts, or educational materials.
161
+
162
+ ---
163
+
164
+ ### Example 3 — Pipeline use
165
+
166
+ **Input:**
167
+
168
+ ```bash
169
+ cat article.txt | persian-readability --plain
170
+ ```
171
+
172
+ **Possible use case:**
173
+
174
+ Developers can integrate the readability score into larger Persian NLP or content-quality workflows.
175
+
176
+ ---
177
+
178
+ ## Sample Output
179
+
180
+ ```text
181
+ ══════════════════════════════════════════════════════
182
+ Persian Readability — Flesch–Dayani
183
+ ══════════════════════════════════════════════════════
184
+ جملات : 5
185
+ کلمات : 87
186
+ حروف : 412
187
+ هجاها : 201
188
+ روش : POS-enhanced — Parsivar
189
+ ────────────────────────────────────────────────────
190
+ ASL (کلمه/جمله) : 17.40
191
+ WL (حرف/کلمه) : 4.74
192
+ ASYL (هجا/کلمه) : 2.31
193
+ ────────────────────────────────────────────────────
194
+ امتیاز Flesch–Dayani : 58.34
195
+ سطح خوانایی : متوسط — مناسب دانش‌آموزان دبیرستان
196
+ ══════════════════════════════════════════════════════
197
+ ```
198
+
199
+ ---
200
+
201
+ ## Formula
202
+
203
+ ```
204
+ FDR = 262.835 − 0.846 × ASYL − 1.015 × ASL
205
+ ```
206
+
207
+ Where **ASYL** = average syllables per word and **ASL** = average words per sentence.
208
+ Higher scores indicate easier text.
209
+
210
+ ---
211
+
212
+ ## How Syllable Accuracy Tiers Work
213
+
214
+ | Mode | Accuracy | How |
215
+ |------|----------|-----|
216
+ | POS-enhanced | ~85% | Parsivar POSTagger (wapiti CRF, Bijankhan corpus) detects verb/adjective tags; prefix/suffix rules applied per POS |
217
+ | Morphological heuristic | ~75% | Counts written long vowels (ا و ی), diacritics, and word-final ه; no POS context |
218
+
219
+ Main cases where POS tagging improves accuracy:
220
+
221
+ - Verbs with attached `می`/`نمی` prefix (no half-space): `میرود` → +1 syllable
222
+ - Comparative/superlative adjectives: `بهترین` → suffix `ترین` = 2 syllables
223
+
224
+ ### خواه Classifier
225
+
226
+ The word `خواه` has multiple roles in Persian. A three-layer classifier resolves ambiguity **before** syllable counting:
227
+
228
+ | Label | Examples | Treatment |
229
+ |-------|---------|-----------|
230
+ | `FUTURE_AUX` | خواهم رفت، نخواهند پذیرفت | syllable count unchanged (هجاشماری base درست است) |
231
+ | `LEXICAL_KHASTAN` | خواهد که برود، این را خواهد | tag اصلی حفظ می‌شود |
232
+ | `PARTICLE_KHAH` | خواه بیاید خواه نیاید | treated as non-verb |
233
+ | `NOMINAL_DERIVATIVE` | خواهش، خواهان، خواهنده | treated as non-verb |
234
+ | `INDEPENDENT_WORD` | خواهر، خواهران | treated as non-verb |
235
+ | `SUFFIX_COMPOUND` | آزادی‌خواه، خیرخواه، دادخواه | treated as non-verb |
236
+
237
+ The classifier uses exact lexical sets (layer 1), suffix-compound detection (layer 2), and a 2-token context window (layer 3) — never a simple prefix regex.
238
+
239
+ ---
240
+
241
+ ## Notes
242
+
243
+ - **Minimum text length:** The Flesch–Dayani formula is designed for running prose. Texts shorter than ~50 words produce unstable scores. A warning is emitted in this case (visible with `--verbose`).
244
+ - **Punctuation filtering:** علائم نشانه‌گذاری فارسی و لاتین (گیومه، نقطه، ویرگول، ...) از لبه‌های هر توکن پاک می‌شوند و توکن‌های تمام‌علامت از شمارش حذف می‌شوند.
245
+ - **stdin:** When running interactively without `-t` or `-f`, the script waits for input and prints a prompt. Press `Ctrl+D` to signal end of input.
246
+ - **Log messages:** All warnings go to stderr and do not affect `--plain` output.
247
+
248
+ ---
249
+
250
+ ## Running Tests
251
+
252
+ ```
253
+ pip install pytest hazm
254
+ python -m pytest tests/test_core.py -v
255
+ ```
256
+
257
+ 76 tests covering: خواه classifier (all 9 document cases), punctuation filtering,
258
+ syllable counting, heuristic limitations, formula verification, and edge cases.
259
+
260
+ ---
261
+
262
+ ## References
263
+
264
+ - Dayani, M. (1374/1995). *سنجش خوانایی متون فارسی*. Persian adaptation of the Flesch Reading Ease formula.
265
+ - Mohtaj et al. (2018). [Parsivar: A Language Processing Toolkit for Persian](https://github.com/ICTRC/Parsivar). LREC 2018.
266
+ - Mohammadi & Khasteh (2020). [A Machine Learning Approach to Persian Text Readability](https://arxiv.org/abs/1810.06639).
267
+ - Sobhe. [hazm — Persian NLP library](https://github.com/roshan-research/hazm).
268
+
269
+ ---
270
+
271
+ ## Author
272
+
273
+ **Dr. Mohammad Pirouzan** — [@Drpirouzan](https://github.com/Drpirouzan)
274
+
275
+ ---
276
+
277
+ ## License
278
+
279
+ MIT License — see [LICENSE](LICENSE) for details.
@@ -0,0 +1,63 @@
1
+ """
2
+ Persian Readability — Flesch–Dayani readability score for Persian/Farsi text.
3
+ """
4
+
5
+ from . import core as _core
6
+
7
+ # Re-export core functions/classes, including existing internal helpers,
8
+ # so older tests/imports keep working after converting the project to a package.
9
+ for _name in dir(_core):
10
+ if not _name.startswith("__"):
11
+ globals()[_name] = getattr(_core, _name)
12
+
13
+ __version__ = "0.1.2"
14
+
15
+
16
+ class PersianReadability:
17
+ """Small convenience wrapper around the core readability calculator."""
18
+
19
+ def analyze(self, text: str, mode="auto"):
20
+ return compute_flesch_dayani(text, mode=mode)
21
+
22
+ def calculate(self, text: str, mode="auto") -> dict:
23
+ return calculate_readability(text, mode=mode)
24
+
25
+
26
+ def calculate_readability(text: str, mode="auto") -> dict:
27
+ """
28
+ Calculate Persian/Farsi readability and return a simple dictionary.
29
+
30
+ This wrapper is useful for users who prefer a JSON-like output instead of
31
+ the ReadabilityResult dataclass.
32
+ """
33
+ result = compute_flesch_dayani(text, mode=mode)
34
+
35
+ return {
36
+ "score": result.flesch_dayani,
37
+ "level": result.level,
38
+ "sentences": result.sentences,
39
+ "words": result.words,
40
+ "letters": result.letters,
41
+ "syllables": result.syllables,
42
+ "asl": result.asl,
43
+ "wl": result.wl,
44
+ "asyl": result.asyl,
45
+ "pos_mode": result.pos_mode,
46
+ "pos_enhanced": result.pos_enhanced,
47
+ "is_likely_poetry": result.is_likely_poetry,
48
+ "diacritics_mode": result.diacritics_mode,
49
+ "diacritic_ratio": result.diacritic_ratio,
50
+ }
51
+
52
+
53
+ __all__ = [
54
+ "PersianReadability",
55
+ "calculate_readability",
56
+ "compute_flesch_dayani",
57
+ "ReadabilityResult",
58
+ "InputMode",
59
+ "count_syllables",
60
+ "count_letters",
61
+ "interpret_score",
62
+ "analyze_diacritics",
63
+ ]