pystylometry 0.1.0__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. pystylometry/__init__.py +1 -2
  2. pystylometry/_normalize.py +277 -0
  3. pystylometry/_types.py +1224 -2
  4. pystylometry/_utils.py +4 -0
  5. pystylometry/authorship/__init__.py +4 -0
  6. pystylometry/authorship/additional_methods.py +100 -0
  7. pystylometry/character/__init__.py +15 -0
  8. pystylometry/character/character_metrics.py +301 -0
  9. pystylometry/lexical/__init__.py +13 -6
  10. pystylometry/lexical/advanced_diversity.py +641 -0
  11. pystylometry/lexical/function_words.py +391 -0
  12. pystylometry/lexical/hapax.py +154 -7
  13. pystylometry/lexical/mtld.py +83 -7
  14. pystylometry/lexical/ttr.py +83 -0
  15. pystylometry/lexical/word_frequency_sophistication.py +581 -0
  16. pystylometry/lexical/yule.py +34 -7
  17. pystylometry/ngrams/__init__.py +2 -0
  18. pystylometry/ngrams/extended_ngrams.py +235 -0
  19. pystylometry/prosody/__init__.py +12 -0
  20. pystylometry/prosody/rhythm_prosody.py +53 -0
  21. pystylometry/readability/__init__.py +12 -0
  22. pystylometry/readability/additional_formulas.py +985 -0
  23. pystylometry/readability/ari.py +93 -17
  24. pystylometry/readability/coleman_liau.py +102 -9
  25. pystylometry/readability/complex_words.py +531 -0
  26. pystylometry/readability/flesch.py +59 -14
  27. pystylometry/readability/gunning_fog.py +194 -25
  28. pystylometry/readability/smog.py +31 -14
  29. pystylometry/readability/syllables.py +137 -30
  30. pystylometry/stylistic/__init__.py +20 -0
  31. pystylometry/stylistic/cohesion_coherence.py +45 -0
  32. pystylometry/stylistic/genre_register.py +45 -0
  33. pystylometry/stylistic/markers.py +131 -0
  34. pystylometry/stylistic/vocabulary_overlap.py +47 -0
  35. pystylometry/syntactic/__init__.py +4 -0
  36. pystylometry/syntactic/advanced_syntactic.py +432 -0
  37. pystylometry/syntactic/pos_ratios.py +104 -13
  38. pystylometry/syntactic/sentence_stats.py +57 -13
  39. pystylometry/syntactic/sentence_types.py +470 -0
  40. {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/METADATA +49 -12
  41. pystylometry-1.0.0.dist-info/RECORD +46 -0
  42. {pystylometry-0.1.0.dist-info → pystylometry-1.0.0.dist-info}/WHEEL +1 -1
  43. pystylometry-0.1.0.dist-info/RECORD +0 -26
@@ -1,21 +1,19 @@
1
- Metadata-Version: 2.4
1
+ Metadata-Version: 2.1
2
2
  Name: pystylometry
3
- Version: 0.1.0
3
+ Version: 1.0.0
4
4
  Summary: Comprehensive Python package for stylometric analysis
5
5
  License: MIT
6
6
  Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
7
7
  Author: Craig Trim
8
8
  Author-email: craigtrim@gmail.com
9
9
  Requires-Python: >=3.11,<4.0
10
- Classifier: Development Status :: 3 - Alpha
10
+ Classifier: Development Status :: 4 - Beta
11
11
  Classifier: Intended Audience :: Developers
12
12
  Classifier: Intended Audience :: Science/Research
13
13
  Classifier: License :: OSI Approved :: MIT License
14
14
  Classifier: Programming Language :: Python :: 3
15
15
  Classifier: Programming Language :: Python :: 3.11
16
16
  Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.13
18
- Classifier: Programming Language :: Python :: 3.14
19
17
  Classifier: Programming Language :: Python :: 3.10
20
18
  Classifier: Programming Language :: Python :: 3.9
21
19
  Classifier: Topic :: Scientific/Engineering :: Information Analysis
@@ -149,10 +147,47 @@ result = compute_yule(text)
149
147
  - **Flesch Reading Ease** - 0-100 difficulty scale
150
148
  - **Flesch-Kincaid Grade** - US grade level
151
149
  - **SMOG Index** - Years of education needed
152
- - **Gunning Fog** - Readability complexity
150
+ - **Gunning Fog** - NLP-enhanced readability complexity (see below)
153
151
  - **Coleman-Liau** - Character-based grade level
154
152
  - **ARI** - Automated Readability Index
155
153
 
154
+ #### Gunning Fog Index - NLP Enhancement
155
+
156
+ The Gunning Fog Index implementation includes advanced NLP features when spaCy is available:
157
+
158
+ **Enhanced Mode** (with spaCy):
159
+ - Accurate proper noun detection via POS tagging (PROPN)
160
+ - True morphological analysis via lemmatization
161
+ - Component-based hyphenated word analysis
162
+ - Handles edge cases: acronyms, irregular verbs, compound nouns
163
+
164
+ **Basic Mode** (without spaCy):
165
+ - Capitalization-based proper noun detection
166
+ - Simple suffix stripping for inflections (-es, -ed, -ing)
167
+ - Component-based hyphenated word analysis
168
+ - Works without external dependencies
169
+
170
+ ```python
171
+ from pystylometry.readability import compute_gunning_fog
172
+
173
+ text = "Understanding computational linguistics requires significant dedication."
174
+ result = compute_gunning_fog(text)
175
+
176
+ print(f"Fog Index: {result.fog_index:.1f}")
177
+ print(f"Grade Level: {result.grade_level}")
178
+ print(f"Detection Mode: {result.metadata['mode']}") # "enhanced" or "basic"
179
+ ```
180
+
181
+ **To enable enhanced mode:**
182
+ ```bash
183
+ pip install pystylometry[readability]
184
+ python -m spacy download en_core_web_sm
185
+ ```
186
+
187
+ **Reference:** Gunning, R. (1952). The Technique of Clear Writing. McGraw-Hill.
188
+
189
+ **Implementation Details:** See [GitHub PR #4](https://github.com/craigtrim/pystylometry/pull/4) for the rationale behind NLP enhancements.
190
+
156
191
  ### Syntactic (requires spaCy)
157
192
  - **POS Ratios** - Noun/verb/adjective/adverb ratios
158
193
  - **Lexical Density** - Content vs function words
@@ -174,11 +209,17 @@ result = compute_yule(text)
174
209
  - stylometry-ttr
175
210
 
176
211
  **Optional:**
177
- - `readability`: pronouncing (for syllable counting)
178
- - `syntactic`: spacy>=3.8.0
212
+ - `readability`: pronouncing (syllable counting), spacy>=3.8.0 (NLP-enhanced Gunning Fog)
213
+ - `syntactic`: spacy>=3.8.0 (POS tagging and syntactic analysis)
179
214
  - `authorship`: None (pure Python + stdlib)
180
215
  - `ngrams`: None (pure Python + stdlib)
181
216
 
217
+ **Note:** spaCy is shared between `readability` and `syntactic` groups. For enhanced Gunning Fog accuracy, download a language model:
218
+ ```bash
219
+ python -m spacy download en_core_web_sm # Small model (13MB)
220
+ python -m spacy download en_core_web_md # Medium model (better accuracy)
221
+ ```
222
+
182
223
  ## Development
183
224
 
184
225
  ```bash
@@ -209,7 +250,6 @@ make format
209
250
  - [ ] Tests
210
251
  - [ ] v0.1.0 release
211
252
 
212
- See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full roadmap.
213
253
 
214
254
  ## Why pystylometry?
215
255
 
@@ -220,9 +260,6 @@ See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full ro
220
260
  - **Type-Safe**: Full type hints for IDE support
221
261
  - **Tested**: Comprehensive test suite
222
262
 
223
- ## References
224
-
225
- See [stylometry-metrics.md](.claude/context/stylometry-metrics.md) for the complete metrics reference table with formulas.
226
263
 
227
264
  ## License
228
265
 
@@ -0,0 +1,46 @@
1
+ pystylometry/__init__.py,sha256=Urm3eneHIeYF08vjzHE5F26whK8hCJMduhyhxowW3Pw,7398
2
+ pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
3
+ pystylometry/_types.py,sha256=x03IN6w5YEzqJbTgpFN3K80-3fjw7AVBVkDSYO7-JaI,55649
4
+ pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
5
+ pystylometry/authorship/__init__.py,sha256=sJ2Oe3VVJubGY-VbWmKImaZmjk6FXgVOuj5L1RUpoH8,396
6
+ pystylometry/authorship/additional_methods.py,sha256=eqpnT_W9CZgLFeiut0XwMlYjnnzeMCbkp66W1tKUc6A,3393
7
+ pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
8
+ pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
9
+ pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
10
+ pystylometry/character/character_metrics.py,sha256=J_WVnN851GbUALQPXPFCW3_zesU6Q2oHsAmEzCfD98k,10959
11
+ pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
12
+ pystylometry/lexical/advanced_diversity.py,sha256=M2wMrmafljtMnWSrMzZjMI-GYRTIUd0tvNfSKZoWV6g,23967
13
+ pystylometry/lexical/function_words.py,sha256=YXuMMqj9vVZHE4UiRRV8ng4xSovuxPMo9FuqLlbDung,15359
14
+ pystylometry/lexical/hapax.py,sha256=URupABCzIyq1a7JmoEBo2HEpxTyghQo-z-aPHbtIhJU,7853
15
+ pystylometry/lexical/mtld.py,sha256=8X25sDIq2HJusBJlr5RmHM9TBMAof_vo9TYc1x4onUI,4596
16
+ pystylometry/lexical/ttr.py,sha256=-9hFM7jhZg0Ft4aboTkihTnH7kectM9n7CflKbAfr30,3279
17
+ pystylometry/lexical/word_frequency_sophistication.py,sha256=Qp6_ZqukgmYQTD_qg0NG3qU7uq9tbbfnQ4rqxn5m21o,36395
18
+ pystylometry/lexical/yule.py,sha256=cuxKjIIFSjiKI9ukZPMMaKeHUVPVngsu4QDZeQ3ZkeU,2849
19
+ pystylometry/ngrams/__init__.py,sha256=eyITmSG4QP1NtVSagPsvc4j6W_E8TdB9wvBvXQHUnwo,379
20
+ pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
21
+ pystylometry/ngrams/extended_ngrams.py,sha256=OsBHTaaK73ZOhpS_yG2aWip1yWY2Fitdte0qx3wIshc,9475
22
+ pystylometry/prosody/__init__.py,sha256=9tiD-U4sqEtUV8n9X339oF_C5tBNingjL-shGBXOrnY,265
23
+ pystylometry/prosody/rhythm_prosody.py,sha256=V9OoxV5d4AZRZAb2HDY7-iEK1ijE7gtHhvFRD2DJvdA,1960
24
+ pystylometry/readability/__init__.py,sha256=bJenjlGpNx7FF5AfOb6VA-wODdIa7Hc9iqoba1DLlh0,637
25
+ pystylometry/readability/additional_formulas.py,sha256=KNH_7v_eAXv8HhSJfGW54529QGanWKbQfldhIn340BA,41794
26
+ pystylometry/readability/ari.py,sha256=DufnzoPhI3DbeXu_B5fH7T4D7MydvII5f0CDWsLYgPo,4642
27
+ pystylometry/readability/coleman_liau.py,sha256=nJ3YZxSoH2WeFgkTDxPlYWCdGWx8n79BG4ziX0-DePg,7111
28
+ pystylometry/readability/complex_words.py,sha256=QyD4m-DeArLPoJzT85oRgj5Ry72eQNVwWpQkxz3IKMo,20717
29
+ pystylometry/readability/flesch.py,sha256=R78aEIr4CyOP6oNRIbXF1RtsZxmFrgSw215ZIziviVU,5007
30
+ pystylometry/readability/gunning_fog.py,sha256=lZqLCRqDAxN6FHdwMRwA-6JlsEYZd0k1MfT-Jo87Oqo,9978
31
+ pystylometry/readability/smog.py,sha256=YTKYzP9giudzT7dK068uWTFNCDFbM-hGt_2Fjw43cF4,3272
32
+ pystylometry/readability/syllables.py,sha256=U_tO1fmdOh2xyIJVkFooGMhmZs1hqlFPBa9wBjEwLw8,4272
33
+ pystylometry/stylistic/__init__.py,sha256=nMykFZUCUKj-ZTk5H0OSKn24w6CSVEVIWieNG2B2hhc,581
34
+ pystylometry/stylistic/cohesion_coherence.py,sha256=M_Pqfj0ZfCLDZBKFQCPx7rX9k6mxWFOjIsm1gsLdFyg,1618
35
+ pystylometry/stylistic/genre_register.py,sha256=R32csC0M3eRcnACJNqMsyN-1ucMwdK8Twm5Tsa0Dd4k,1664
36
+ pystylometry/stylistic/markers.py,sha256=s0ybwUZ6_wE064NXL9kQeTLKVeSHScFgZip7zkKYi2U,5134
37
+ pystylometry/stylistic/vocabulary_overlap.py,sha256=TD8Rn32htB6MPHjc9xkr0LepJ6Q9k7f6uJvZt9_5aXA,1717
38
+ pystylometry/syntactic/__init__.py,sha256=B9qe0R7w9t5x2s2dXygSuvciuEHrScgD3CkxvPWKMPE,391
39
+ pystylometry/syntactic/advanced_syntactic.py,sha256=y41g5kpGbKZXHDDumCbJDhpnu9HFiDsN4obl178DE38,16357
40
+ pystylometry/syntactic/pos_ratios.py,sha256=lEPE1LxcML1ydzEo_3eCjpAIPndtU5c_QsRyVWEf4ns,4848
41
+ pystylometry/syntactic/sentence_stats.py,sha256=W4tEu9JV6cDZw1IRwT20UHcwqdomcN3QogNLlTd3xlc,3200
42
+ pystylometry/syntactic/sentence_types.py,sha256=txWgDgN5AjL1wTAHVTp7PnBD1DMgildcYE1Ysd3Jgok,16342
43
+ pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
44
+ pystylometry-1.0.0.dist-info/METADATA,sha256=VHK8TpynEpNCdJdUY8jYtxaKKs7C9YAA5zTAntuH-SI,8153
45
+ pystylometry-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
46
+ pystylometry-1.0.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 2.3.0
2
+ Generator: poetry-core 1.9.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -1,26 +0,0 @@
1
- pystylometry/__init__.py,sha256=gCsVT2Eu9sP20Wmq8KQgIf0fEPdZhs-Dlp1IWWfmC5Y,7454
2
- pystylometry/_types.py,sha256=lldXONgNhTVH5ZGHUVx7BKez7szR8Svi05BSg0M3VsY,3503
3
- pystylometry/_utils.py,sha256=UN1hBe0WDIov33dG4di7Br9xIIFUwka6SoeOeJ_aGpg,5116
4
- pystylometry/authorship/__init__.py,sha256=iXkH94lGd5kCDtk1UOuz3gUfOsAJb4DDcv2evTWSCAA,233
5
- pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
6
- pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
7
- pystylometry/lexical/__init__.py,sha256=eHAEzZytevXnR7f1KY3ADrczdmH3YZ2LZCrUx0vXJJw,416
8
- pystylometry/lexical/hapax.py,sha256=JVwbd7rWe0iy-KGwAGK-PFN7mLhB6xwa4WFvoqGmfMA,2365
9
- pystylometry/lexical/mtld.py,sha256=tNtvpHKyf_i4E7wTDYZkG6BIkAVbaCOPZpZDApPLtxE,1850
10
- pystylometry/lexical/yule.py,sha256=XOdGth-E_rCXVp_Y6ZdTXMW2QATw0NoDbNPuXIZFjas,1898
11
- pystylometry/ngrams/__init__.py,sha256=pJadMr_VAMOENr-CteCqlgokuCP2QIPku7blMj-9ToI,295
12
- pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
13
- pystylometry/readability/__init__.py,sha256=kwVGQyxk1e7W7NnZTjwSJ-0pXmyFYeeoNRSt7bpzjoA,346
14
- pystylometry/readability/ari.py,sha256=Br1ZJAxtoDoT2fFeKRhQuiwyWpobS9E8_wRyizh5w3c,2257
15
- pystylometry/readability/coleman_liau.py,sha256=weH4w9HSdLxhUiYEub3WgNjKsbSXKMkq-zEZtaqjyU4,2058
16
- pystylometry/readability/flesch.py,sha256=6YuoS8Qo6KP6enJKMEZ731mMAOKdz2AnVvzZYNVxZHM,2700
17
- pystylometry/readability/gunning_fog.py,sha256=5x8pntoYMV2pQAnI3-LBm_Od1LJaTcIxnOJ9ESBNRB8,1984
18
- pystylometry/readability/smog.py,sha256=5Z-PNmJtnsCQTUfv_vnaQdaUAoK0EatCQJW2ypqav6c,2171
19
- pystylometry/readability/syllables.py,sha256=BuNm9A9ZSzE7kG3qGzhzw47vzui5DAG1nMis6gbwC4k,1373
20
- pystylometry/syntactic/__init__.py,sha256=8iJRXTu3VTXnoqIIwyLTWlLvxMweq_stG3FkynpoGVM,217
21
- pystylometry/syntactic/pos_ratios.py,sha256=tzM4y6vqkZp1Jx2L4JzdWtLBegrlqOUgZrohnwjB3H8,1876
22
- pystylometry/syntactic/sentence_stats.py,sha256=iMOeXhV6VwDt9lKy66pq8i-zQ4JsmdFdNGAsFR5_c48,1825
23
- pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
24
- pystylometry-0.1.0.dist-info/METADATA,sha256=u8t8BmHbeaoDDzTOFicH0YMW5tt3ljStkIai1iwTBK8,6802
25
- pystylometry-0.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
26
- pystylometry-0.1.0.dist-info/RECORD,,