pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. pystylometry/README.md +42 -0
  2. pystylometry/__init__.py +45 -3
  3. pystylometry/_types.py +1017 -259
  4. pystylometry/authorship/README.md +21 -0
  5. pystylometry/authorship/__init__.py +28 -4
  6. pystylometry/authorship/additional_methods.py +260 -40
  7. pystylometry/authorship/compression.py +175 -0
  8. pystylometry/authorship/kilgarriff.py +354 -0
  9. pystylometry/character/README.md +17 -0
  10. pystylometry/character/character_metrics.py +267 -179
  11. pystylometry/cli.py +427 -0
  12. pystylometry/consistency/README.md +27 -0
  13. pystylometry/consistency/__init__.py +57 -0
  14. pystylometry/consistency/_thresholds.py +162 -0
  15. pystylometry/consistency/drift.py +549 -0
  16. pystylometry/dialect/README.md +26 -0
  17. pystylometry/dialect/__init__.py +65 -0
  18. pystylometry/dialect/_data/dialect_markers.json +1134 -0
  19. pystylometry/dialect/_loader.py +360 -0
  20. pystylometry/dialect/detector.py +533 -0
  21. pystylometry/lexical/README.md +23 -0
  22. pystylometry/lexical/advanced_diversity.py +61 -22
  23. pystylometry/lexical/function_words.py +255 -56
  24. pystylometry/lexical/hapax.py +182 -52
  25. pystylometry/lexical/mtld.py +108 -26
  26. pystylometry/lexical/ttr.py +76 -10
  27. pystylometry/lexical/word_frequency_sophistication.py +1522 -298
  28. pystylometry/lexical/yule.py +136 -50
  29. pystylometry/ngrams/README.md +18 -0
  30. pystylometry/ngrams/entropy.py +150 -49
  31. pystylometry/ngrams/extended_ngrams.py +314 -69
  32. pystylometry/prosody/README.md +17 -0
  33. pystylometry/prosody/rhythm_prosody.py +773 -11
  34. pystylometry/readability/README.md +23 -0
  35. pystylometry/readability/additional_formulas.py +1887 -762
  36. pystylometry/readability/ari.py +144 -82
  37. pystylometry/readability/coleman_liau.py +136 -109
  38. pystylometry/readability/flesch.py +177 -73
  39. pystylometry/readability/gunning_fog.py +165 -161
  40. pystylometry/readability/smog.py +123 -42
  41. pystylometry/stylistic/README.md +20 -0
  42. pystylometry/stylistic/cohesion_coherence.py +669 -13
  43. pystylometry/stylistic/genre_register.py +1560 -17
  44. pystylometry/stylistic/markers.py +611 -17
  45. pystylometry/stylistic/vocabulary_overlap.py +354 -13
  46. pystylometry/syntactic/README.md +20 -0
  47. pystylometry/syntactic/advanced_syntactic.py +76 -14
  48. pystylometry/syntactic/pos_ratios.py +70 -6
  49. pystylometry/syntactic/sentence_stats.py +55 -12
  50. pystylometry/syntactic/sentence_types.py +71 -15
  51. pystylometry/viz/README.md +27 -0
  52. pystylometry/viz/__init__.py +71 -0
  53. pystylometry/viz/drift.py +589 -0
  54. pystylometry/viz/jsx/__init__.py +31 -0
  55. pystylometry/viz/jsx/_base.py +144 -0
  56. pystylometry/viz/jsx/report.py +677 -0
  57. pystylometry/viz/jsx/timeline.py +716 -0
  58. pystylometry/viz/jsx/viewer.py +1032 -0
  59. pystylometry-1.3.0.dist-info/METADATA +136 -0
  60. pystylometry-1.3.0.dist-info/RECORD +76 -0
  61. {pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
  62. pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
  63. pystylometry-1.0.0.dist-info/METADATA +0 -275
  64. pystylometry-1.0.0.dist-info/RECORD +0 -46
@@ -0,0 +1,136 @@
1
+ Metadata-Version: 2.4
2
+ Name: pystylometry
3
+ Version: 1.3.0
4
+ Summary: Comprehensive Python package for stylometric analysis
5
+ License: MIT
6
+ Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
7
+ Author: Craig Trim
8
+ Author-email: craigtrim@gmail.com
9
+ Requires-Python: >=3.9,<4.0
10
+ Classifier: Development Status :: 4 - Beta
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.9
16
+ Classifier: Programming Language :: Python :: 3.10
17
+ Classifier: Programming Language :: Python :: 3.11
18
+ Classifier: Programming Language :: Python :: 3.12
19
+ Classifier: Programming Language :: Python :: 3.13
20
+ Classifier: Programming Language :: Python :: 3.14
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Text Processing :: Linguistic
23
+ Classifier: Typing :: Typed
24
+ Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
25
+ Project-URL: Homepage, https://github.com/craigtrim/pystylometry
26
+ Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
27
+ Project-URL: Repository, https://github.com/craigtrim/pystylometry
28
+ Description-Content-Type: text/markdown
29
+
30
+ # pystylometry
31
+
32
+ [![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
33
+ [![Downloads](https://pepy.tech/badge/pystylometry)](https://pepy.tech/project/pystylometry)
34
+ [![Python 3.11+](https://img.shields.io/badge/python-3.11%2B-blue.svg)](https://www.python.org/downloads/)
35
+ [![License: MIT](https://img.shields.io/badge/License-MIT-green.svg)](https://opensource.org/licenses/MIT)
36
+ [![Tests](https://img.shields.io/badge/tests-1022%20passed-brightgreen)]()
37
+
38
+ Stylometric analysis and authorship attribution for Python. 50+ metrics across 11 modules, from vocabulary diversity to AI-generation detection.
39
+
40
+ ## Install
41
+
42
+ ```bash
43
+ pip install pystylometry # Core (lexical metrics)
44
+ pip install pystylometry[all] # Everything
45
+ ```
46
+
47
+ <details>
48
+ <summary>Individual extras</summary>
49
+
50
+ ```bash
51
+ pip install pystylometry[readability] # Readability formulas (pronouncing, spaCy)
52
+ pip install pystylometry[syntactic] # POS/parse analysis (spaCy)
53
+ pip install pystylometry[authorship] # Attribution methods
54
+ pip install pystylometry[ngrams] # N-gram entropy
55
+ pip install pystylometry[viz] # Matplotlib visualizations
56
+ ```
57
+ </details>
58
+
59
+ ## Usage
60
+
61
+ ```python
62
+ from pystylometry.lexical import compute_mtld, compute_yule
63
+ from pystylometry.readability import compute_flesch
64
+
65
+ result = compute_mtld(text)
66
+ print(result.mtld_average) # 72.4
67
+
68
+ result = compute_flesch(text)
69
+ print(result.reading_ease) # 65.2
70
+ print(result.grade_level) # 8.1
71
+ ```
72
+
73
+ Every function returns a typed dataclass with the score, components, and metadata -- never a bare float.
74
+
75
+ ### Unified API
76
+
77
+ ```python
78
+ from pystylometry import analyze
79
+
80
+ results = analyze(text, lexical=True, readability=True, syntactic=True)
81
+ ```
82
+
83
+ ### Style Drift Detection
84
+
85
+ Detect authorship changes, spliced content, and AI-generated text within a single document.
86
+
87
+ ```python
88
+ from pystylometry.consistency import compute_kilgarriff_drift
89
+
90
+ result = compute_kilgarriff_drift(document)
91
+ print(result.pattern) # "sudden_spike"
92
+ print(result.pattern_confidence) # 0.71
93
+ print(result.max_location) # Window 23 -- the splice point
94
+ ```
95
+
96
+ ### CLI
97
+
98
+ ```bash
99
+ pystylometry-drift manuscript.txt --window-size=500 --stride=250
100
+ pystylometry-viewer report.html
101
+ ```
102
+
103
+ ## Modules
104
+
105
+ | Module | Metrics | Description |
106
+ |--------|---------|-------------|
107
+ | [**lexical**](pystylometry/lexical/) | TTR, MTLD, Yule's K/I, Hapax, MATTR, VocD-D, HD-D, MSTTR, function words, word frequency | Vocabulary diversity and richness |
108
+ | [**readability**](pystylometry/readability/) | Flesch, Flesch-Kincaid, SMOG, Gunning Fog, Coleman-Liau, ARI, Dale-Chall, Fry, FORCAST, Linsear Write, Powers-Sumner-Kearl | Grade-level and difficulty scoring |
109
+ | [**syntactic**](pystylometry/syntactic/) | POS ratios, sentence types, parse tree depth, clausal density, passive voice, T-units, dependency distance | Sentence and parse structure (requires spaCy) |
110
+ | [**authorship**](pystylometry/authorship/) | Burrows' Delta, Cosine Delta, Zeta, Kilgarriff chi-squared, MinMax, John's Delta, NCD | Author attribution and text comparison |
111
+ | [**stylistic**](pystylometry/stylistic/) | Contractions, hedges, intensifiers, modals, punctuation, vocabulary overlap (Jaccard/Dice/Cosine/KL), cohesion, genre/register | Style markers and text similarity |
112
+ | [**character**](pystylometry/character/) | Letter frequencies, digit/uppercase ratios, special characters, whitespace | Character-level fingerprinting |
113
+ | [**ngrams**](pystylometry/ngrams/) | Word/character/POS n-grams, Shannon entropy, skipgrams | N-gram profiles and entropy |
114
+ | [**dialect**](pystylometry/dialect/) | British/American classification, spelling/grammar/vocabulary markers, markedness | Regional dialect detection |
115
+ | [**consistency**](pystylometry/consistency/) | Sliding-window chi-squared drift, pattern classification | Intra-document style analysis |
116
+ | [**prosody**](pystylometry/prosody/) | Syllable stress, rhythm regularity | Prose rhythm (requires spaCy) |
117
+ | [**viz**](pystylometry/viz/) | Timeline, scatter, report (PNG + interactive HTML) | Drift detection visualization |
118
+
119
+ ## Development
120
+
121
+ ```bash
122
+ git clone https://github.com/craigtrim/pystylometry && cd pystylometry
123
+ pip install -e ".[dev,all]"
124
+ make test # 1022 tests
125
+ make lint # ruff + mypy
126
+ make all # lint + test + build
127
+ ```
128
+
129
+ ## License
130
+
131
+ MIT
132
+
133
+ ## Author
134
+
135
+ Craig Trim -- craigtrim@gmail.com
136
+
@@ -0,0 +1,76 @@
1
+ pystylometry/README.md,sha256=WFOtCAF3qtDTgGG3a_jTjNSwVgpQEXI1PKqbVBfyo1M,2366
2
+ pystylometry/__init__.py,sha256=Z6zkHlX05SUeObDca9dL1Gkfq4UPBWbU2M4sp4fVj78,9220
3
+ pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
4
+ pystylometry/_types.py,sha256=HddTq-8kGeXyTXFkUd26HmOlOhCOdIgEVULHp168ais,76563
5
+ pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
6
+ pystylometry/authorship/README.md,sha256=zNXCpLj7nczPnYykJnCUw3y-kxfC9mWZmngi3nfw6us,1016
7
+ pystylometry/authorship/__init__.py,sha256=D7m38hWi_62o1ZDSrghLCfob9YsykTht4K37wiVgHfg,1530
8
+ pystylometry/authorship/additional_methods.py,sha256=jvEg6TMI55jhkDt1jpC-08iXTzz6TaNmKOkJy5qNF0c,11487
9
+ pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
10
+ pystylometry/authorship/compression.py,sha256=qqUHDd7wWOB6Q2E97-cczBEWhKDTF3ynJUhbRqGq_RA,6296
11
+ pystylometry/authorship/kilgarriff.py,sha256=oz4JbLnFEuPXZYLmhfkuapg516A554FvXvVNIVu7uKk,13379
12
+ pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
13
+ pystylometry/character/README.md,sha256=poQwhbI8MabVD_626CWjEL87IOX5YDGS0ZJTH1hNwEE,607
14
+ pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
15
+ pystylometry/character/character_metrics.py,sha256=OCIGP_ivtwtzcifcxcbmp2R5SIKh2tKyvKcHAv64S8g,14029
16
+ pystylometry/cli.py,sha256=z0yx2O_E05tHT9_BHgSaQ2zq5_fBERXfhbYHcuQ2y-A,15477
17
+ pystylometry/consistency/README.md,sha256=HG_Rd6WRBnIz3M7J11dVDv1S2ARkMABFYrTn-VV8xRY,1058
18
+ pystylometry/consistency/__init__.py,sha256=l7nzpS7M4yHDBbM2LGAtW0XGT2n7YjSey_1xKf45224,2181
19
+ pystylometry/consistency/_thresholds.py,sha256=5fZwdJ_cnDy0ED7CCYs6V_zP6kIAR1p0h0NYkbZ0HRg,6381
20
+ pystylometry/consistency/drift.py,sha256=ZqK7YJXic8ceIfQLkH9ZtXFJCFyOuto5Mktz4qLG9ps,20682
21
+ pystylometry/dialect/README.md,sha256=Bz0oGFRaWXjfZQqlMgvQ75rA9U0E67am2mJ9nWcSBhQ,1089
22
+ pystylometry/dialect/__init__.py,sha256=6S4OKymniuDXPm3ZMqWyy9179RlWoLJoDzkCP4P7Jss,2486
23
+ pystylometry/dialect/_data/dialect_markers.json,sha256=DthluOA6q0rG_8IrCrFIYWh_EMvINqYv7W664sEjNN4,51799
24
+ pystylometry/dialect/_loader.py,sha256=M2ATp-5754v_yX9EWvBP0r5qgNf8xlL8XadVsVb_Hco,12989
25
+ pystylometry/dialect/detector.py,sha256=9x0ZuIfTIjsmdNSx0Ezy5AC0SAFtC4kVw11iOSBd9gQ,20147
26
+ pystylometry/lexical/README.md,sha256=cFQ7KRZV4ubsQwIlOH3YHTbhhNl5X91Sr3zcn-3x0HI,1185
27
+ pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
28
+ pystylometry/lexical/advanced_diversity.py,sha256=rL1hlNqTnaEFcA2v4oBJlojHZMTqdvvm4jYXTFGVpYE,25664
29
+ pystylometry/lexical/function_words.py,sha256=eel9bq_qWgWlvG0NtDiouilMt9kaFqz2rh3add2UC4U,17832
30
+ pystylometry/lexical/hapax.py,sha256=djTqZyZIYXa3GRiPoy6TTGHPm0wCRNJ9U0Rwnf5NoDk,12173
31
+ pystylometry/lexical/mtld.py,sha256=XpeCF8sOXZhWbaazHGuqm08mrOf_DYfkfGGAltWnyy4,7101
32
+ pystylometry/lexical/ttr.py,sha256=iEsXkoSPyZEyiiFwKatKA8KhLRukD7RDRvyRkRQOTsk,5848
33
+ pystylometry/lexical/word_frequency_sophistication.py,sha256=OHOS0fBvd1Bz8zsJk-pJbWLTgImmBd-aewQnp_kq8BY,38828
34
+ pystylometry/lexical/yule.py,sha256=NXggha8jmQCu4i-qKZpISwyJBqNpuPHyVR86BLDLgio,5192
35
+ pystylometry/ngrams/README.md,sha256=50wyaWcLGbosLzTPR1cXdE_xAVU8jVY7fd3ReEk9KnY,802
36
+ pystylometry/ngrams/__init__.py,sha256=eyITmSG4QP1NtVSagPsvc4j6W_E8TdB9wvBvXQHUnwo,379
37
+ pystylometry/ngrams/entropy.py,sha256=i2RzYXrcTTIv6QaUCNQjAahL5LFOctG3ZE1OJ_tY4II,7246
38
+ pystylometry/ngrams/extended_ngrams.py,sha256=288nrXbY6-PIJiQ3NaspnuRZ7qWakantnNKvtb5LhWI,18316
39
+ pystylometry/prosody/README.md,sha256=YNTU0sTnXbCJ9GBPDDfTqHELr4YoF59_bg99ejPiqEE,608
40
+ pystylometry/prosody/__init__.py,sha256=9tiD-U4sqEtUV8n9X339oF_C5tBNingjL-shGBXOrnY,265
41
+ pystylometry/prosody/rhythm_prosody.py,sha256=fifKW0FiRwC6xPX1NX0Yr4Il3APNfQiBEXB-uXXgZo8,28697
42
+ pystylometry/readability/README.md,sha256=jj5I5525WRJceMJR8lECiZb-7y1nFzSK00GSotqupFs,1173
43
+ pystylometry/readability/__init__.py,sha256=bJenjlGpNx7FF5AfOb6VA-wODdIa7Hc9iqoba1DLlh0,637
44
+ pystylometry/readability/additional_formulas.py,sha256=nlVegnn_RRh6TP0BoLWlLBNnAgtFqLqyDsxFN_fUrAg,44993
45
+ pystylometry/readability/ari.py,sha256=_wPl0FjEReLRHN0v4JQbRaU_kbikIxkr9mLO6hmNVyI,6833
46
+ pystylometry/readability/coleman_liau.py,sha256=NcEQFGEJxCubCP3dnWnPmlHAIhkDYXd5hIq3xSTHULk,6319
47
+ pystylometry/readability/complex_words.py,sha256=QyD4m-DeArLPoJzT85oRgj5Ry72eQNVwWpQkxz3IKMo,20717
48
+ pystylometry/readability/flesch.py,sha256=7kMeqpYnm-oqQGsDw7yJBhFecXB5ZRU9C8P4UKjWYD4,7985
49
+ pystylometry/readability/gunning_fog.py,sha256=ntV90NUfqSm_84H1jBa2Fhr5DhlkderHLq8_z3khb48,8375
50
+ pystylometry/readability/smog.py,sha256=8hdQQHUR9UBP-02AyZK3TbNhyyE1LQuZmlnVrs5Yvrk,5742
51
+ pystylometry/readability/syllables.py,sha256=U_tO1fmdOh2xyIJVkFooGMhmZs1hqlFPBa9wBjEwLw8,4272
52
+ pystylometry/stylistic/README.md,sha256=1GBo3AQ8f4ATap723is6pJtgUM9jmLy-hDOTcVWuI48,1020
53
+ pystylometry/stylistic/__init__.py,sha256=nMykFZUCUKj-ZTk5H0OSKn24w6CSVEVIWieNG2B2hhc,581
54
+ pystylometry/stylistic/cohesion_coherence.py,sha256=9al3AYH2KQ62aluQJQr0pQHcNf1Aec6G8Oa9zux_uZk,23286
55
+ pystylometry/stylistic/genre_register.py,sha256=4s-TxEBnFB-iog2yIO1RT6D66AQ3ChOjakRmOZzL8LM,41279
56
+ pystylometry/stylistic/markers.py,sha256=AsuBsq5ZNTGHEp12AEL0mHj9XCJBKf3bwt7JW4H_xKs,24204
57
+ pystylometry/stylistic/vocabulary_overlap.py,sha256=6ujoiE7TqrCiGEBrBuDeU6sdKSQYAG6IbrYVR3o9lMY,12931
58
+ pystylometry/syntactic/README.md,sha256=0eQGqQz9MIE024_Oge4pq9LNdi-GmuTuAlz-DrK2jDI,982
59
+ pystylometry/syntactic/__init__.py,sha256=B9qe0R7w9t5x2s2dXygSuvciuEHrScgD3CkxvPWKMPE,391
60
+ pystylometry/syntactic/advanced_syntactic.py,sha256=ygbm7y1hrNJCaIxRCfZsafvt6BInh2iCTY1eWk2PdaE,19195
61
+ pystylometry/syntactic/pos_ratios.py,sha256=lcvtx6tshVG6MpTWivyWnqFsjFXIHK3LCqyg2AL2AjY,7444
62
+ pystylometry/syntactic/sentence_stats.py,sha256=SJg6TYCiT3gs2bXHYuEMSRgzFnxqOCH5q6WyhjXKgH4,4947
63
+ pystylometry/syntactic/sentence_types.py,sha256=xEQPieGqTInCz9BinvItBX5Z_ofQ-BbFwTFNgY0jWx0,18730
64
+ pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
65
+ pystylometry/viz/README.md,sha256=mizuBpUzWgJqjC2u9C-Lu4sVDCcTQOgGsarRSkeWPf4,1031
66
+ pystylometry/viz/__init__.py,sha256=3kHMAcJJi8oPhTqUZIRdyf311cdyPOHWaJIUv-w0V04,2219
67
+ pystylometry/viz/drift.py,sha256=r98gQ4s_IlrEuaouxDMyue3cTjGqj10i4IeKC01IuCo,18956
68
+ pystylometry/viz/jsx/__init__.py,sha256=ZCgbpMPhG5PiJ92IkJRrZwrb7RodZB9MyauO0MGgbRM,1107
69
+ pystylometry/viz/jsx/_base.py,sha256=nd7kEc13fUcRMom3A5jqjGyTy-djIeydq2k3oPHZIHY,3708
70
+ pystylometry/viz/jsx/report.py,sha256=DbbHnnNAEi5tmVg4PmiHb17vkBBXujyE4x1CfVBiOBw,25857
71
+ pystylometry/viz/jsx/timeline.py,sha256=hor-xnBa6oVkSqN0AEZUCQFBOB-iTfHSFZHiEfeakPA,30716
72
+ pystylometry/viz/jsx/viewer.py,sha256=3LO49d_2bRf_P-P-2oSKpKx4N8Ugo4oCLb3DtvyNxXI,43716
73
+ pystylometry-1.3.0.dist-info/METADATA,sha256=wsQ5QTEH7i6hpePEnlfDgJFKVHJi1m-HpMcHuznQt3c,5706
74
+ pystylometry-1.3.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
75
+ pystylometry-1.3.0.dist-info/entry_points.txt,sha256=iHOaFXlyiwcQM1LlID2gWGmN4DBLdTSpKGjttU8tgm8,113
76
+ pystylometry-1.3.0.dist-info/RECORD,,
@@ -1,4 +1,4 @@
1
1
  Wheel-Version: 1.0
2
- Generator: poetry-core 1.9.0
2
+ Generator: poetry-core 2.3.0
3
3
  Root-Is-Purelib: true
4
4
  Tag: py3-none-any
@@ -0,0 +1,4 @@
1
+ [console_scripts]
2
+ pystylometry-drift=pystylometry.cli:drift_cli
3
+ pystylometry-viewer=pystylometry.cli:viewer_cli
4
+
@@ -1,275 +0,0 @@
1
- Metadata-Version: 2.1
2
- Name: pystylometry
3
- Version: 1.0.0
4
- Summary: Comprehensive Python package for stylometric analysis
5
- License: MIT
6
- Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
7
- Author: Craig Trim
8
- Author-email: craigtrim@gmail.com
9
- Requires-Python: >=3.11,<4.0
10
- Classifier: Development Status :: 4 - Beta
11
- Classifier: Intended Audience :: Developers
12
- Classifier: Intended Audience :: Science/Research
13
- Classifier: License :: OSI Approved :: MIT License
14
- Classifier: Programming Language :: Python :: 3
15
- Classifier: Programming Language :: Python :: 3.11
16
- Classifier: Programming Language :: Python :: 3.12
17
- Classifier: Programming Language :: Python :: 3.10
18
- Classifier: Programming Language :: Python :: 3.9
19
- Classifier: Topic :: Scientific/Engineering :: Information Analysis
20
- Classifier: Topic :: Text Processing :: Linguistic
21
- Classifier: Typing :: Typed
22
- Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
23
- Project-URL: Homepage, https://github.com/craigtrim/pystylometry
24
- Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
25
- Project-URL: Repository, https://github.com/craigtrim/pystylometry
26
- Description-Content-Type: text/markdown
27
-
28
- # pystylometry
29
-
30
- [![Python Version](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
31
- [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
32
- [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
33
- [![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
34
-
35
- A comprehensive Python package for stylometric analysis with modular architecture and optional dependencies.
36
-
37
- ## Features
38
-
39
- **pystylometry** provides 50+ metrics across five analysis domains:
40
-
41
- - **Lexical Diversity**: TTR, MTLD, Yule's K, Hapax ratios, and more
42
- - **Readability**: Flesch, SMOG, Gunning Fog, Coleman-Liau, ARI
43
- - **Syntactic Analysis**: POS ratios, sentence statistics (requires spaCy)
44
- - **Authorship Attribution**: Burrows' Delta, Cosine Delta, Zeta scores
45
- - **N-gram Analysis**: Character and word bigram entropy, perplexity
46
-
47
- ## Installation
48
-
49
- Install only what you need:
50
-
51
- ```bash
52
- # Core package (lexical metrics only)
53
- pip install pystylometry
54
-
55
- # With readability metrics
56
- pip install pystylometry[readability]
57
-
58
- # With syntactic metrics (requires spaCy)
59
- pip install pystylometry[syntactic]
60
-
61
- # With authorship metrics
62
- pip install pystylometry[authorship]
63
-
64
- # With n-gram analysis
65
- pip install pystylometry[ngrams]
66
-
67
- # Everything
68
- pip install pystylometry[all]
69
- ```
70
-
71
- ## Quick Start
72
-
73
- ### Using Individual Modules
74
-
75
- ```python
76
- from pystylometry.lexical import compute_mtld, compute_yule
77
- from pystylometry.readability import compute_flesch
78
-
79
- text = "Your text here..."
80
-
81
- # Lexical diversity
82
- mtld = compute_mtld(text)
83
- print(f"MTLD: {mtld.mtld_average:.2f}")
84
-
85
- yule = compute_yule(text)
86
- print(f"Yule's K: {yule.yule_k:.2f}")
87
-
88
- # Readability
89
- flesch = compute_flesch(text)
90
- print(f"Reading Ease: {flesch.reading_ease:.1f}")
91
- print(f"Grade Level: {flesch.grade_level:.1f}")
92
- ```
93
-
94
- ### Using the Unified API
95
-
96
- ```python
97
- from pystylometry import analyze
98
-
99
- text = "Your text here..."
100
-
101
- # Analyze with multiple metrics at once
102
- results = analyze(text, lexical=True, readability=True)
103
-
104
- # Access results
105
- print(f"MTLD: {results.lexical['mtld'].mtld_average:.2f}")
106
- print(f"Flesch: {results.readability['flesch'].reading_ease:.1f}")
107
- ```
108
-
109
- ### Checking Available Modules
110
-
111
- ```python
112
- from pystylometry import get_available_modules
113
-
114
- available = get_available_modules()
115
- print(available)
116
- # {'lexical': True, 'readability': True, 'syntactic': False, ...}
117
- ```
118
-
119
- ## API Design
120
-
121
- ### Clean, Consistent Interface
122
-
123
- Every metric function:
124
- - Takes text as input
125
- - Returns a rich result object (never just a float)
126
- - Includes metadata about the computation
127
- - Has comprehensive docstrings with formulas and references
128
-
129
- ```python
130
- from pystylometry.lexical import compute_yule
131
-
132
- result = compute_yule(text)
133
- # Returns: YuleResult(yule_k=..., yule_i=..., metadata={...})
134
- ```
135
-
136
- ## Available Metrics
137
-
138
- ### Lexical Diversity
139
- - **TTR** - Type-Token Ratio (via stylometry-ttr)
140
- - **MTLD** - Measure of Textual Lexical Diversity
141
- - **Yule's K** - Vocabulary repetitiveness
142
- - **Hapax Legomena** - Words appearing once/twice
143
- - **Sichel's S** - Hapax-based richness
144
- - **Honoré's R** - Vocabulary richness constant
145
-
146
- ### Readability
147
- - **Flesch Reading Ease** - 0-100 difficulty scale
148
- - **Flesch-Kincaid Grade** - US grade level
149
- - **SMOG Index** - Years of education needed
150
- - **Gunning Fog** - NLP-enhanced readability complexity (see below)
151
- - **Coleman-Liau** - Character-based grade level
152
- - **ARI** - Automated Readability Index
153
-
154
- #### Gunning Fog Index - NLP Enhancement
155
-
156
- The Gunning Fog Index implementation includes advanced NLP features when spaCy is available:
157
-
158
- **Enhanced Mode** (with spaCy):
159
- - Accurate proper noun detection via POS tagging (PROPN)
160
- - True morphological analysis via lemmatization
161
- - Component-based hyphenated word analysis
162
- - Handles edge cases: acronyms, irregular verbs, compound nouns
163
-
164
- **Basic Mode** (without spaCy):
165
- - Capitalization-based proper noun detection
166
- - Simple suffix stripping for inflections (-es, -ed, -ing)
167
- - Component-based hyphenated word analysis
168
- - Works without external dependencies
169
-
170
- ```python
171
- from pystylometry.readability import compute_gunning_fog
172
-
173
- text = "Understanding computational linguistics requires significant dedication."
174
- result = compute_gunning_fog(text)
175
-
176
- print(f"Fog Index: {result.fog_index:.1f}")
177
- print(f"Grade Level: {result.grade_level}")
178
- print(f"Detection Mode: {result.metadata['mode']}") # "enhanced" or "basic"
179
- ```
180
-
181
- **To enable enhanced mode:**
182
- ```bash
183
- pip install pystylometry[readability]
184
- python -m spacy download en_core_web_sm
185
- ```
186
-
187
- **Reference:** Gunning, R. (1952). The Technique of Clear Writing. McGraw-Hill.
188
-
189
- **Implementation Details:** See [GitHub PR #4](https://github.com/craigtrim/pystylometry/pull/4) for the rationale behind NLP enhancements.
190
-
191
- ### Syntactic (requires spaCy)
192
- - **POS Ratios** - Noun/verb/adjective/adverb ratios
193
- - **Lexical Density** - Content vs function words
194
- - **Sentence Statistics** - Length, variation, complexity
195
-
196
- ### Authorship (requires scikit-learn, scipy)
197
- - **Burrows' Delta** - Author distance measure
198
- - **Cosine Delta** - Angular distance
199
- - **Zeta Scores** - Distinctive word usage
200
-
201
- ### N-grams (requires nltk)
202
- - **Character Bigram Entropy** - Character predictability
203
- - **Word Bigram Entropy** - Word sequence predictability
204
- - **Perplexity** - Language model fit
205
-
206
- ## Dependencies
207
-
208
- **Core (always installed):**
209
- - stylometry-ttr
210
-
211
- **Optional:**
212
- - `readability`: pronouncing (syllable counting), spacy>=3.8.0 (NLP-enhanced Gunning Fog)
213
- - `syntactic`: spacy>=3.8.0 (POS tagging and syntactic analysis)
214
- - `authorship`: None (pure Python + stdlib)
215
- - `ngrams`: None (pure Python + stdlib)
216
-
217
- **Note:** spaCy is shared between `readability` and `syntactic` groups. For enhanced Gunning Fog accuracy, download a language model:
218
- ```bash
219
- python -m spacy download en_core_web_sm # Small model (13MB)
220
- python -m spacy download en_core_web_md # Medium model (better accuracy)
221
- ```
222
-
223
- ## Development
224
-
225
- ```bash
226
- # Clone the repository
227
- git clone https://github.com/craigtrim/pystylometry
228
- cd pystylometry
229
-
230
- # Install with dev dependencies
231
- pip install -e ".[dev,all]"
232
-
233
- # Run tests
234
- make test
235
-
236
- # Run linters
237
- make lint
238
-
239
- # Format code
240
- make format
241
- ```
242
-
243
- ## Project Status
244
-
245
- 🚧 **Phase 1 - Core Lexical Metrics** (In Progress)
246
- - [x] Project structure
247
- - [ ] MTLD implementation
248
- - [ ] Yule's K implementation
249
- - [ ] Hapax ratios implementation
250
- - [ ] Tests
251
- - [ ] v0.1.0 release
252
-
253
-
254
- ## Why pystylometry?
255
-
256
- - **Modular**: Install only what you need
257
- - **Consistent**: Uniform API across all metrics
258
- - **Rich Results**: Dataclass objects with metadata, not just numbers
259
- - **Well-Documented**: Formulas, references, and interpretations
260
- - **Type-Safe**: Full type hints for IDE support
261
- - **Tested**: Comprehensive test suite
262
-
263
-
264
- ## License
265
-
266
- MIT License - see LICENSE file for details.
267
-
268
- ## Author
269
-
270
- Craig Trim (craigtrim@gmail.com)
271
-
272
- ## Contributing
273
-
274
- Contributions welcome! Please open an issue or PR on GitHub.
275
-
@@ -1,46 +0,0 @@
1
- pystylometry/__init__.py,sha256=Urm3eneHIeYF08vjzHE5F26whK8hCJMduhyhxowW3Pw,7398
2
- pystylometry/_normalize.py,sha256=7tdfgAKg5CI2d4eoDypmFqOVByoxpwgUUZD6vyBH86A,8679
3
- pystylometry/_types.py,sha256=x03IN6w5YEzqJbTgpFN3K80-3fjw7AVBVkDSYO7-JaI,55649
4
- pystylometry/_utils.py,sha256=CXTx4KDJ_6iiHcc2OXqOYs-izhLf_ZEmJFKdHyd7q34,5282
5
- pystylometry/authorship/__init__.py,sha256=sJ2Oe3VVJubGY-VbWmKImaZmjk6FXgVOuj5L1RUpoH8,396
6
- pystylometry/authorship/additional_methods.py,sha256=eqpnT_W9CZgLFeiut0XwMlYjnnzeMCbkp66W1tKUc6A,3393
7
- pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
8
- pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
9
- pystylometry/character/__init__.py,sha256=CiiKJmZ10UJE8qAecavpOKyw-vGonsOew_mFH34ZOC0,371
10
- pystylometry/character/character_metrics.py,sha256=J_WVnN851GbUALQPXPFCW3_zesU6Q2oHsAmEzCfD98k,10959
11
- pystylometry/lexical/__init__.py,sha256=HTncnGVZgpktZqpf-r4_HI_9Jq42WkZZKXn8nho3y3s,751
12
- pystylometry/lexical/advanced_diversity.py,sha256=M2wMrmafljtMnWSrMzZjMI-GYRTIUd0tvNfSKZoWV6g,23967
13
- pystylometry/lexical/function_words.py,sha256=YXuMMqj9vVZHE4UiRRV8ng4xSovuxPMo9FuqLlbDung,15359
14
- pystylometry/lexical/hapax.py,sha256=URupABCzIyq1a7JmoEBo2HEpxTyghQo-z-aPHbtIhJU,7853
15
- pystylometry/lexical/mtld.py,sha256=8X25sDIq2HJusBJlr5RmHM9TBMAof_vo9TYc1x4onUI,4596
16
- pystylometry/lexical/ttr.py,sha256=-9hFM7jhZg0Ft4aboTkihTnH7kectM9n7CflKbAfr30,3279
17
- pystylometry/lexical/word_frequency_sophistication.py,sha256=Qp6_ZqukgmYQTD_qg0NG3qU7uq9tbbfnQ4rqxn5m21o,36395
18
- pystylometry/lexical/yule.py,sha256=cuxKjIIFSjiKI9ukZPMMaKeHUVPVngsu4QDZeQ3ZkeU,2849
19
- pystylometry/ngrams/__init__.py,sha256=eyITmSG4QP1NtVSagPsvc4j6W_E8TdB9wvBvXQHUnwo,379
20
- pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
21
- pystylometry/ngrams/extended_ngrams.py,sha256=OsBHTaaK73ZOhpS_yG2aWip1yWY2Fitdte0qx3wIshc,9475
22
- pystylometry/prosody/__init__.py,sha256=9tiD-U4sqEtUV8n9X339oF_C5tBNingjL-shGBXOrnY,265
23
- pystylometry/prosody/rhythm_prosody.py,sha256=V9OoxV5d4AZRZAb2HDY7-iEK1ijE7gtHhvFRD2DJvdA,1960
24
- pystylometry/readability/__init__.py,sha256=bJenjlGpNx7FF5AfOb6VA-wODdIa7Hc9iqoba1DLlh0,637
25
- pystylometry/readability/additional_formulas.py,sha256=KNH_7v_eAXv8HhSJfGW54529QGanWKbQfldhIn340BA,41794
26
- pystylometry/readability/ari.py,sha256=DufnzoPhI3DbeXu_B5fH7T4D7MydvII5f0CDWsLYgPo,4642
27
- pystylometry/readability/coleman_liau.py,sha256=nJ3YZxSoH2WeFgkTDxPlYWCdGWx8n79BG4ziX0-DePg,7111
28
- pystylometry/readability/complex_words.py,sha256=QyD4m-DeArLPoJzT85oRgj5Ry72eQNVwWpQkxz3IKMo,20717
29
- pystylometry/readability/flesch.py,sha256=R78aEIr4CyOP6oNRIbXF1RtsZxmFrgSw215ZIziviVU,5007
30
- pystylometry/readability/gunning_fog.py,sha256=lZqLCRqDAxN6FHdwMRwA-6JlsEYZd0k1MfT-Jo87Oqo,9978
31
- pystylometry/readability/smog.py,sha256=YTKYzP9giudzT7dK068uWTFNCDFbM-hGt_2Fjw43cF4,3272
32
- pystylometry/readability/syllables.py,sha256=U_tO1fmdOh2xyIJVkFooGMhmZs1hqlFPBa9wBjEwLw8,4272
33
- pystylometry/stylistic/__init__.py,sha256=nMykFZUCUKj-ZTk5H0OSKn24w6CSVEVIWieNG2B2hhc,581
34
- pystylometry/stylistic/cohesion_coherence.py,sha256=M_Pqfj0ZfCLDZBKFQCPx7rX9k6mxWFOjIsm1gsLdFyg,1618
35
- pystylometry/stylistic/genre_register.py,sha256=R32csC0M3eRcnACJNqMsyN-1ucMwdK8Twm5Tsa0Dd4k,1664
36
- pystylometry/stylistic/markers.py,sha256=s0ybwUZ6_wE064NXL9kQeTLKVeSHScFgZip7zkKYi2U,5134
37
- pystylometry/stylistic/vocabulary_overlap.py,sha256=TD8Rn32htB6MPHjc9xkr0LepJ6Q9k7f6uJvZt9_5aXA,1717
38
- pystylometry/syntactic/__init__.py,sha256=B9qe0R7w9t5x2s2dXygSuvciuEHrScgD3CkxvPWKMPE,391
39
- pystylometry/syntactic/advanced_syntactic.py,sha256=y41g5kpGbKZXHDDumCbJDhpnu9HFiDsN4obl178DE38,16357
40
- pystylometry/syntactic/pos_ratios.py,sha256=lEPE1LxcML1ydzEo_3eCjpAIPndtU5c_QsRyVWEf4ns,4848
41
- pystylometry/syntactic/sentence_stats.py,sha256=W4tEu9JV6cDZw1IRwT20UHcwqdomcN3QogNLlTd3xlc,3200
42
- pystylometry/syntactic/sentence_types.py,sha256=txWgDgN5AjL1wTAHVTp7PnBD1DMgildcYE1Ysd3Jgok,16342
43
- pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
44
- pystylometry-1.0.0.dist-info/METADATA,sha256=VHK8TpynEpNCdJdUY8jYtxaKKs7C9YAA5zTAntuH-SI,8153
45
- pystylometry-1.0.0.dist-info/WHEEL,sha256=sP946D7jFCHeNz5Iq4fL4Lu-PrWrFsgfLXbbkciIZwg,88
46
- pystylometry-1.0.0.dist-info/RECORD,,