pystylometry 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystylometry/__init__.py +1 -2
- pystylometry/_normalize.py +277 -0
- pystylometry/_types.py +1224 -2
- pystylometry/_utils.py +4 -0
- pystylometry/authorship/__init__.py +4 -0
- pystylometry/authorship/additional_methods.py +100 -0
- pystylometry/character/__init__.py +15 -0
- pystylometry/character/character_metrics.py +301 -0
- pystylometry/lexical/__init__.py +13 -6
- pystylometry/lexical/advanced_diversity.py +641 -0
- pystylometry/lexical/function_words.py +391 -0
- pystylometry/lexical/hapax.py +154 -7
- pystylometry/lexical/mtld.py +83 -7
- pystylometry/lexical/ttr.py +83 -0
- pystylometry/lexical/word_frequency_sophistication.py +581 -0
- pystylometry/lexical/yule.py +34 -7
- pystylometry/ngrams/__init__.py +2 -0
- pystylometry/ngrams/extended_ngrams.py +235 -0
- pystylometry/prosody/__init__.py +12 -0
- pystylometry/prosody/rhythm_prosody.py +53 -0
- pystylometry/readability/__init__.py +12 -0
- pystylometry/readability/additional_formulas.py +985 -0
- pystylometry/readability/ari.py +93 -17
- pystylometry/readability/coleman_liau.py +102 -9
- pystylometry/readability/complex_words.py +531 -0
- pystylometry/readability/flesch.py +59 -14
- pystylometry/readability/gunning_fog.py +194 -25
- pystylometry/readability/smog.py +31 -14
- pystylometry/readability/syllables.py +137 -30
- pystylometry/stylistic/__init__.py +20 -0
- pystylometry/stylistic/cohesion_coherence.py +45 -0
- pystylometry/stylistic/genre_register.py +45 -0
- pystylometry/stylistic/markers.py +131 -0
- pystylometry/stylistic/vocabulary_overlap.py +47 -0
- pystylometry/syntactic/__init__.py +4 -0
- pystylometry/syntactic/advanced_syntactic.py +432 -0
- pystylometry/syntactic/pos_ratios.py +104 -13
- pystylometry/syntactic/sentence_stats.py +57 -13
- pystylometry/syntactic/sentence_types.py +470 -0
- {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/METADATA +49 -12
- pystylometry-1.0.0.dist-info/RECORD +46 -0
- {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/WHEEL +1 -1
- pystylometry-0.1.0.dist-info/RECORD +0 -26
|
@@ -1,21 +1,19 @@
|
|
|
1
|
-
Metadata-Version: 2.
|
|
1
|
+
Metadata-Version: 2.1
|
|
2
2
|
Name: pystylometry
|
|
3
|
-
Version:
|
|
3
|
+
Version: 1.0.0
|
|
4
4
|
Summary: Comprehensive Python package for stylometric analysis
|
|
5
5
|
License: MIT
|
|
6
6
|
Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
|
|
7
7
|
Author: Craig Trim
|
|
8
8
|
Author-email: craigtrim@gmail.com
|
|
9
9
|
Requires-Python: >=3.11,<4.0
|
|
10
|
-
Classifier: Development Status ::
|
|
10
|
+
Classifier: Development Status :: 4 - Beta
|
|
11
11
|
Classifier: Intended Audience :: Developers
|
|
12
12
|
Classifier: Intended Audience :: Science/Research
|
|
13
13
|
Classifier: License :: OSI Approved :: MIT License
|
|
14
14
|
Classifier: Programming Language :: Python :: 3
|
|
15
15
|
Classifier: Programming Language :: Python :: 3.11
|
|
16
16
|
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
-
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
-
Classifier: Programming Language :: Python :: 3.14
|
|
19
17
|
Classifier: Programming Language :: Python :: 3.10
|
|
20
18
|
Classifier: Programming Language :: Python :: 3.9
|
|
21
19
|
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
@@ -149,10 +147,47 @@ result = compute_yule(text)
|
|
|
149
147
|
- **Flesch Reading Ease** - 0-100 difficulty scale
|
|
150
148
|
- **Flesch-Kincaid Grade** - US grade level
|
|
151
149
|
- **SMOG Index** - Years of education needed
|
|
152
|
-
- **Gunning Fog** -
|
|
150
|
+
- **Gunning Fog** - NLP-enhanced readability complexity (see below)
|
|
153
151
|
- **Coleman-Liau** - Character-based grade level
|
|
154
152
|
- **ARI** - Automated Readability Index
|
|
155
153
|
|
|
154
|
+
#### Gunning Fog Index - NLP Enhancement
|
|
155
|
+
|
|
156
|
+
The Gunning Fog Index implementation includes advanced NLP features when spaCy is available:
|
|
157
|
+
|
|
158
|
+
**Enhanced Mode** (with spaCy):
|
|
159
|
+
- Accurate proper noun detection via POS tagging (PROPN)
|
|
160
|
+
- True morphological analysis via lemmatization
|
|
161
|
+
- Component-based hyphenated word analysis
|
|
162
|
+
- Handles edge cases: acronyms, irregular verbs, compound nouns
|
|
163
|
+
|
|
164
|
+
**Basic Mode** (without spaCy):
|
|
165
|
+
- Capitalization-based proper noun detection
|
|
166
|
+
- Simple suffix stripping for inflections (-es, -ed, -ing)
|
|
167
|
+
- Component-based hyphenated word analysis
|
|
168
|
+
- Works without external dependencies
|
|
169
|
+
|
|
170
|
+
```python
|
|
171
|
+
from pystylometry.readability import compute_gunning_fog
|
|
172
|
+
|
|
173
|
+
text = "Understanding computational linguistics requires significant dedication."
|
|
174
|
+
result = compute_gunning_fog(text)
|
|
175
|
+
|
|
176
|
+
print(f"Fog Index: {result.fog_index:.1f}")
|
|
177
|
+
print(f"Grade Level: {result.grade_level}")
|
|
178
|
+
print(f"Detection Mode: {result.metadata['mode']}") # "enhanced" or "basic"
|
|
179
|
+
```
|
|
180
|
+
|
|
181
|
+
**To enable enhanced mode:**
|
|
182
|
+
```bash
|
|
183
|
+
pip install pystylometry[readability]
|
|
184
|
+
python -m spacy download en_core_web_sm
|
|
185
|
+
```
|
|
186
|
+
|
|
187
|
+
**Reference:** Gunning, R. (1952). The Technique of Clear Writing. McGraw-Hill.
|
|
188
|
+
|
|
189
|
+
**Implementation Details:** See [GitHub PR #4](https://github.com/craigtrim/pystylometry/pull/4) for the rationale behind NLP enhancements.
|
|
190
|
+
|
|
156
191
|
### Syntactic (requires spaCy)
|
|
157
192
|
- **POS Ratios** - Noun/verb/adjective/adverb ratios
|
|
158
193
|
- **Lexical Density** - Content vs function words
|
|
@@ -174,11 +209,17 @@ result = compute_yule(text)
|
|
|
174
209
|
- stylometry-ttr
|
|
175
210
|
|
|
176
211
|
**Optional:**
|
|
177
|
-
- `readability`: pronouncing (
|
|
178
|
-
- `syntactic`: spacy>=3.8.0
|
|
212
|
+
- `readability`: pronouncing (syllable counting), spacy>=3.8.0 (NLP-enhanced Gunning Fog)
|
|
213
|
+
- `syntactic`: spacy>=3.8.0 (POS tagging and syntactic analysis)
|
|
179
214
|
- `authorship`: None (pure Python + stdlib)
|
|
180
215
|
- `ngrams`: None (pure Python + stdlib)
|
|
181
216
|
|
|
217
|
+
**Note:** spaCy is shared between `readability` and `syntactic` groups. For enhanced Gunning Fog accuracy, download a language model:
|
|
218
|
+
```bash
|
|
219
|
+
python -m spacy download en_core_web_sm # Small model (13MB)
|
|
220
|
+
python -m spacy download en_core_web_md # Medium model (better accuracy)
|
|
221
|
+
```
|
|
222
|
+
|
|
182
223
|
## Development
|
|
183
224
|
|
|
184
225
|
```bash
|
|
@@ -209,7 +250,6 @@ make format
|
|
|
209
250
|
- [ ] Tests
|
|
210
251
|
- [ ] v0.1.0 release
|
|
211
252
|
|
|
212
|
-
See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full roadmap.
|
|
213
253
|
|
|
214
254
|
## Why pystylometry?
|
|
215
255
|
|
|
@@ -220,9 +260,6 @@ See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full ro
|
|
|
220
260
|
- **Type-Safe**: Full type hints for IDE support
|
|
221
261
|
- **Tested**: Comprehensive test suite
|
|
222
262
|
|
|
223
|
-
## References
|
|
224
|
-
|
|
225
|
-
See [stylometry-metrics.md](.claude/context/stylometry-metrics.md) for the complete metrics reference table with formulas.
|
|
226
263
|
|
|
227
264
|
## License
|
|
228
265
|
|
|
@@ -0,0 +1,46 @@
|
|
|
1
|
+
pystylometry/__init__.py,sha256=Urm3eneHIeYF08vjzHE5F26whK8hCJMduhyhxowW3Pw,7398
|
|
2
|
+
pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
|
|
3
|
+
pystylometry/_types.py,sha256=x03IN6w5YEzqJbTgpFN3K80-3fjw7AVBVkDSYO7-JaI,55649
|
|
4
|
+
pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
|
|
5
|
+
pystylometry/authorship/__init__.py,sha256=sJ2Oe3VVJubGY-VbWmKImaZmjk6FXgVOuj5L1RUpoH8,396
|
|
6
|
+
pystylometry/authorship/additional_methods.py,sha256=eqpnT_W9CZgLFeiut0XwMlYjnnzeMCbkp66W1tKUc6A,3393
|
|
7
|
+
pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
|
|
8
|
+
pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
|
|
9
|
+
pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
|
|
10
|
+
pystylometry/character/character_metrics.py,sha256=J_WVnN851GbUALQPXPFCW3_zesU6Q2oHsAmEzCfD98k,10959
|
|
11
|
+
pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
|
|
12
|
+
pystylometry/lexical/advanced_diversity.py,sha256=M2wMrmafljtMnWSrMzZjMI-GYRTIUd0tvNfSKZoWV6g,23967
|
|
13
|
+
pystylometry/lexical/function_words.py,sha256=YXuMMqj9vVZHE4UiRRV8ng4xSovuxPMo9FuqLlbDung,15359
|
|
14
|
+
pystylometry/lexical/hapax.py,sha256=URupABCzIyq1a7JmoEBo2HEpxTyghQo-z-aPHbtIhJU,7853
|
|
15
|
+
pystylometry/lexical/mtld.py,sha256=8X25sDIq2HJusBJlr5RmHM9TBMAof_vo9TYc1x4onUI,4596
|
|
16
|
+
pystylometry/lexical/ttr.py,sha256=-9hFM7jhZg0Ft4aboTkihTnH7kectM9n7CflKbAfr30,3279
|
|
17
|
+
pystylometry/lexical/word_frequency_sophistication.py,sha256=Qp6_ZqukgmYQTD_qg0NG3qU7uq9tbbfnQ4rqxn5m21o,36395
|
|
18
|
+
pystylometry/lexical/yule.py,sha256=cuxKjIIFSjiKI9ukZPMMaKeHUVPVngsu4QDZeQ3ZkeU,2849
|
|
19
|
+
pystylometry/ngrams/__init__.py,sha256=eyITmSG4QP1NtVSagPsvc4j6W_E8TdB9wvBvXQHUnwo,379
|
|
20
|
+
pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
|
|
21
|
+
pystylometry/ngrams/extended_ngrams.py,sha256=OsBHTaaK73ZOhpS_yG2aWip1yWY2Fitdte0qx3wIshc,9475
|
|
22
|
+
pystylometry/prosody/__init__.py,sha256=9tiD-U4sqEtUV8n9X339oF_C5tBNingjL-shGBXOrnY,265
|
|
23
|
+
pystylometry/prosody/rhythm_prosody.py,sha256=V9OoxV5d4AZRZAb2HDY7-iEK1ijE7gtHhvFRD2DJvdA,1960
|
|
24
|
+
pystylometry/readability/__init__.py,sha256=bJenjlGpNx7FF5AfOb6VA-wODdIa7Hc9iqoba1DLlh0,637
|
|
25
|
+
pystylometry/readability/additional_formulas.py,sha256=KNH_7v_eAXv8HhSJfGW54529QGanWKbQfldhIn340BA,41794
|
|
26
|
+
pystylometry/readability/ari.py,sha256=DufnzoPhI3DbeXu_B5fH7T4D7MydvII5f0CDWsLYgPo,4642
|
|
27
|
+
pystylometry/readability/coleman_liau.py,sha256=nJ3YZxSoH2WeFgkTDxPlYWCdGWx8n79BG4ziX0-DePg,7111
|
|
28
|
+
pystylometry/readability/complex_words.py,sha256=QyD4m-DeArLPoJzT85oRgj5Ry72eQNVwWpQkxz3IKMo,20717
|
|
29
|
+
pystylometry/readability/flesch.py,sha256=R78aEIr4CyOP6oNRIbXF1RtsZxmFrgSw215ZIziviVU,5007
|
|
30
|
+
pystylometry/readability/gunning_fog.py,sha256=lZqLCRqDAxN6FHdwMRwA-6JlsEYZd0k1MfT-Jo87Oqo,9978
|
|
31
|
+
pystylometry/readability/smog.py,sha256=YTKYzP9giudzT7dK068uWTFNCDFbM-hGt_2Fjw43cF4,3272
|
|
32
|
+
pystylometry/readability/syllables.py,sha256=U_tO1fmdOh2xyIJVkFooGMhmZs1hqlFPBa9wBjEwLw8,4272
|
|
33
|
+
pystylometry/stylistic/__init__.py,sha256=nMykFZUCUKj-ZTk5H0OSKn24w6CSVEVIWieNG2B2hhc,581
|
|
34
|
+
pystylometry/stylistic/cohesion_coherence.py,sha256=M_Pqfj0ZfCLDZBKFQCPx7rX9k6mxWFOjIsm1gsLdFyg,1618
|
|
35
|
+
pystylometry/stylistic/genre_register.py,sha256=R32csC0M3eRcnACJNqMsyN-1ucMwdK8Twm5Tsa0Dd4k,1664
|
|
36
|
+
pystylometry/stylistic/markers.py,sha256=s0ybwUZ6_wE064NXL9kQeTLKVeSHScFgZip7zkKYi2U,5134
|
|
37
|
+
pystylometry/stylistic/vocabulary_overlap.py,sha256=TD8Rn32htB6MPHjc9xkr0LepJ6Q9k7f6uJvZt9_5aXA,1717
|
|
38
|
+
pystylometry/syntactic/__init__.py,sha256=B9qe0R7w9t5x2s2dXygSuvciuEHrScgD3CkxvPWKMPE,391
|
|
39
|
+
pystylometry/syntactic/advanced_syntactic.py,sha256=y41g5kpGbKZXHDDumCbJDhpnu9HFiDsN4obl178DE38,16357
|
|
40
|
+
pystylometry/syntactic/pos_ratios.py,sha256=lEPE1LxcML1ydzEo_3eCjpAIPndtU5c_QsRyVWEf4ns,4848
|
|
41
|
+
pystylometry/syntactic/sentence_stats.py,sha256=W4tEu9JV6cDZw1IRwT20UHcwqdomcN3QogNLlTd3xlc,3200
|
|
42
|
+
pystylometry/syntactic/sentence_types.py,sha256=txWgDgN5AjL1wTAHVTp7PnBD1DMgildcYE1Ysd3Jgok,16342
|
|
43
|
+
pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
|
|
44
|
+
pystylometry-1.0.0.dist-info/METADATA,sha256=VHK8TpynEpNCdJdUY8jYtxaKKs7C9YAA5zTAntuH-SI,8153
|
|
45
|
+
pystylometry-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
|
|
46
|
+
pystylometry-1.0.0.dist-info/RECORD,,
|
|
@@ -1,26 +0,0 @@
|
|
|
1
|
-
pystylometry/__init__.py,sha256=gCsVT2Eu9sP20Wmq8KQgIf0fEPdZhs-Dlp1IWWfmC5Y,7454
|
|
2
|
-
pystylometry/_types.py,sha256=lldXONgNhTVH5ZGHUVx7BKez7szR8Svi05BSg0M3VsY,3503
|
|
3
|
-
pystylometry/_utils.py,sha256=UN1hBe0WDIov33dG4di7Br9xIIFUwka6SoeOeJ_aGpg,5116
|
|
4
|
-
pystylometry/authorship/__init__.py,sha256=iXkH94lGd5kCDtk1UOuz3gUfOsAJb4DDcv2evTWSCAA,233
|
|
5
|
-
pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
|
|
6
|
-
pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
|
|
7
|
-
pystylometry/lexical/__init__.py,sha256=eHAEzZytevXnR7f1KY3ADrczdmH3YZ2LZCrUx0vXJJw,416
|
|
8
|
-
pystylometry/lexical/hapax.py,sha256=JVwbd7rWe0iy-KGwAGK-PFN7mLhB6xwa4WFvoqGmfMA,2365
|
|
9
|
-
pystylometry/lexical/mtld.py,sha256=tNtvpHKyf_i4E7wTDYZkG6BIkAVbaCOPZpZDApPLtxE,1850
|
|
10
|
-
pystylometry/lexical/yule.py,sha256=XOdGth-E_rCXVp_Y6ZdTXMW2QATw0NoDbNPuXIZFjas,1898
|
|
11
|
-
pystylometry/ngrams/__init__.py,sha256=pJadMr_VAMOENr-CteCqlgokuCP2QIPku7blMj-9ToI,295
|
|
12
|
-
pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
|
|
13
|
-
pystylometry/readability/__init__.py,sha256=kwVGQyxk1e7W7NnZTjwSJ-0pXmyFYeeoNRSt7bpzjoA,346
|
|
14
|
-
pystylometry/readability/ari.py,sha256=Br1ZJAxtoDoT2fFeKRhQuiwyWpobS9E8_wRyizh5w3c,2257
|
|
15
|
-
pystylometry/readability/coleman_liau.py,sha256=weH4w9HSdLxhUiYEub3WgNjKsbSXKMkq-zEZtaqjyU4,2058
|
|
16
|
-
pystylometry/readability/flesch.py,sha256=6YuoS8Qo6KP6enJKMEZ731mMAOKdz2AnVvzZYNVxZHM,2700
|
|
17
|
-
pystylometry/readability/gunning_fog.py,sha256=5x8pntoYMV2pQAnI3-LBm_Od1LJaTcIxnOJ9ESBNRB8,1984
|
|
18
|
-
pystylometry/readability/smog.py,sha256=5Z-PNmJtnsCQTUfv_vnaQdaUAoK0EatCQJW2ypqav6c,2171
|
|
19
|
-
pystylometry/readability/syllables.py,sha256=BuNm9A9ZSzE7kG3qGzhzw47vzui5DAG1nMis6gbwC4k,1373
|
|
20
|
-
pystylometry/syntactic/__init__.py,sha256=8iJRXTu3VTXnoqIIwyLTWlLvxMweq_stG3FkynpoGVM,217
|
|
21
|
-
pystylometry/syntactic/pos_ratios.py,sha256=tzM4y6vqkZp1Jx2L4JzdWtLBegrlqOUgZrohnwjB3H8,1876
|
|
22
|
-
pystylometry/syntactic/sentence_stats.py,sha256=iMOeXhV6VwDt9lKy66pq8i-zQ4JsmdFdNGAsFR5_c48,1825
|
|
23
|
-
pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
|
|
24
|
-
pystylometry-0.1.0.dist-info/METADATA,sha256=u8t8BmHbeaoDDzTOFicH0YMW5tt3ljStkIai1iwTBK8,6802
|
|
25
|
-
pystylometry-0.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
26
|
-
pystylometry-0.1.0.dist-info/RECORD,,
|