pystylometry 0.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystylometry/__init__.py +206 -0
- pystylometry/_types.py +172 -0
- pystylometry/_utils.py +197 -0
- pystylometry/authorship/__init__.py +10 -0
- pystylometry/authorship/burrows_delta.py +152 -0
- pystylometry/authorship/zeta.py +109 -0
- pystylometry/lexical/__init__.py +17 -0
- pystylometry/lexical/hapax.py +75 -0
- pystylometry/lexical/mtld.py +61 -0
- pystylometry/lexical/yule.py +66 -0
- pystylometry/ngrams/__init__.py +13 -0
- pystylometry/ngrams/entropy.py +130 -0
- pystylometry/readability/__init__.py +15 -0
- pystylometry/readability/ari.py +70 -0
- pystylometry/readability/coleman_liau.py +67 -0
- pystylometry/readability/flesch.py +81 -0
- pystylometry/readability/gunning_fog.py +63 -0
- pystylometry/readability/smog.py +71 -0
- pystylometry/readability/syllables.py +54 -0
- pystylometry/syntactic/__init__.py +9 -0
- pystylometry/syntactic/pos_ratios.py +61 -0
- pystylometry/syntactic/sentence_stats.py +60 -0
- pystylometry/tokenizer.py +598 -0
- pystylometry-0.1.0.dist-info/METADATA +238 -0
- pystylometry-0.1.0.dist-info/RECORD +26 -0
- pystylometry-0.1.0.dist-info/WHEEL +4 -0
|
@@ -0,0 +1,238 @@
|
|
|
1
|
+
Metadata-Version: 2.4
|
|
2
|
+
Name: pystylometry
|
|
3
|
+
Version: 0.1.0
|
|
4
|
+
Summary: Comprehensive Python package for stylometric analysis
|
|
5
|
+
License: MIT
|
|
6
|
+
Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
|
|
7
|
+
Author: Craig Trim
|
|
8
|
+
Author-email: craigtrim@gmail.com
|
|
9
|
+
Requires-Python: >=3.11,<4.0
|
|
10
|
+
Classifier: Development Status :: 3 - Alpha
|
|
11
|
+
Classifier: Intended Audience :: Developers
|
|
12
|
+
Classifier: Intended Audience :: Science/Research
|
|
13
|
+
Classifier: License :: OSI Approved :: MIT License
|
|
14
|
+
Classifier: Programming Language :: Python :: 3
|
|
15
|
+
Classifier: Programming Language :: Python :: 3.11
|
|
16
|
+
Classifier: Programming Language :: Python :: 3.12
|
|
17
|
+
Classifier: Programming Language :: Python :: 3.13
|
|
18
|
+
Classifier: Programming Language :: Python :: 3.14
|
|
19
|
+
Classifier: Programming Language :: Python :: 3.10
|
|
20
|
+
Classifier: Programming Language :: Python :: 3.9
|
|
21
|
+
Classifier: Topic :: Scientific/Engineering :: Information Analysis
|
|
22
|
+
Classifier: Topic :: Text Processing :: Linguistic
|
|
23
|
+
Classifier: Typing :: Typed
|
|
24
|
+
Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
|
|
25
|
+
Project-URL: Homepage, https://github.com/craigtrim/pystylometry
|
|
26
|
+
Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
|
|
27
|
+
Project-URL: Repository, https://github.com/craigtrim/pystylometry
|
|
28
|
+
Description-Content-Type: text/markdown
|
|
29
|
+
|
|
30
|
+
# pystylometry
|
|
31
|
+
|
|
32
|
+
[](https://www.python.org/downloads/)
|
|
33
|
+
[](https://opensource.org/licenses/MIT)
|
|
34
|
+
[](https://github.com/astral-sh/ruff)
|
|
35
|
+
[](https://badge.fury.io/py/pystylometry)
|
|
36
|
+
|
|
37
|
+
A comprehensive Python package for stylometric analysis with modular architecture and optional dependencies.
|
|
38
|
+
|
|
39
|
+
## Features
|
|
40
|
+
|
|
41
|
+
**pystylometry** provides 50+ metrics across five analysis domains:
|
|
42
|
+
|
|
43
|
+
- **Lexical Diversity**: TTR, MTLD, Yule's K, Hapax ratios, and more
|
|
44
|
+
- **Readability**: Flesch, SMOG, Gunning Fog, Coleman-Liau, ARI
|
|
45
|
+
- **Syntactic Analysis**: POS ratios, sentence statistics (requires spaCy)
|
|
46
|
+
- **Authorship Attribution**: Burrows' Delta, Cosine Delta, Zeta scores
|
|
47
|
+
- **N-gram Analysis**: Character and word bigram entropy, perplexity
|
|
48
|
+
|
|
49
|
+
## Installation
|
|
50
|
+
|
|
51
|
+
Install only what you need:
|
|
52
|
+
|
|
53
|
+
```bash
|
|
54
|
+
# Core package (lexical metrics only)
|
|
55
|
+
pip install pystylometry
|
|
56
|
+
|
|
57
|
+
# With readability metrics
|
|
58
|
+
pip install pystylometry[readability]
|
|
59
|
+
|
|
60
|
+
# With syntactic metrics (requires spaCy)
|
|
61
|
+
pip install pystylometry[syntactic]
|
|
62
|
+
|
|
63
|
+
# With authorship metrics
|
|
64
|
+
pip install pystylometry[authorship]
|
|
65
|
+
|
|
66
|
+
# With n-gram analysis
|
|
67
|
+
pip install pystylometry[ngrams]
|
|
68
|
+
|
|
69
|
+
# Everything
|
|
70
|
+
pip install pystylometry[all]
|
|
71
|
+
```
|
|
72
|
+
|
|
73
|
+
## Quick Start
|
|
74
|
+
|
|
75
|
+
### Using Individual Modules
|
|
76
|
+
|
|
77
|
+
```python
|
|
78
|
+
from pystylometry.lexical import compute_mtld, compute_yule
|
|
79
|
+
from pystylometry.readability import compute_flesch
|
|
80
|
+
|
|
81
|
+
text = "Your text here..."
|
|
82
|
+
|
|
83
|
+
# Lexical diversity
|
|
84
|
+
mtld = compute_mtld(text)
|
|
85
|
+
print(f"MTLD: {mtld.mtld_average:.2f}")
|
|
86
|
+
|
|
87
|
+
yule = compute_yule(text)
|
|
88
|
+
print(f"Yule's K: {yule.yule_k:.2f}")
|
|
89
|
+
|
|
90
|
+
# Readability
|
|
91
|
+
flesch = compute_flesch(text)
|
|
92
|
+
print(f"Reading Ease: {flesch.reading_ease:.1f}")
|
|
93
|
+
print(f"Grade Level: {flesch.grade_level:.1f}")
|
|
94
|
+
```
|
|
95
|
+
|
|
96
|
+
### Using the Unified API
|
|
97
|
+
|
|
98
|
+
```python
|
|
99
|
+
from pystylometry import analyze
|
|
100
|
+
|
|
101
|
+
text = "Your text here..."
|
|
102
|
+
|
|
103
|
+
# Analyze with multiple metrics at once
|
|
104
|
+
results = analyze(text, lexical=True, readability=True)
|
|
105
|
+
|
|
106
|
+
# Access results
|
|
107
|
+
print(f"MTLD: {results.lexical['mtld'].mtld_average:.2f}")
|
|
108
|
+
print(f"Flesch: {results.readability['flesch'].reading_ease:.1f}")
|
|
109
|
+
```
|
|
110
|
+
|
|
111
|
+
### Checking Available Modules
|
|
112
|
+
|
|
113
|
+
```python
|
|
114
|
+
from pystylometry import get_available_modules
|
|
115
|
+
|
|
116
|
+
available = get_available_modules()
|
|
117
|
+
print(available)
|
|
118
|
+
# {'lexical': True, 'readability': True, 'syntactic': False, ...}
|
|
119
|
+
```
|
|
120
|
+
|
|
121
|
+
## API Design
|
|
122
|
+
|
|
123
|
+
### Clean, Consistent Interface
|
|
124
|
+
|
|
125
|
+
Every metric function:
|
|
126
|
+
- Takes text as input
|
|
127
|
+
- Returns a rich result object (never just a float)
|
|
128
|
+
- Includes metadata about the computation
|
|
129
|
+
- Has comprehensive docstrings with formulas and references
|
|
130
|
+
|
|
131
|
+
```python
|
|
132
|
+
from pystylometry.lexical import compute_yule
|
|
133
|
+
|
|
134
|
+
result = compute_yule(text)
|
|
135
|
+
# Returns: YuleResult(yule_k=..., yule_i=..., metadata={...})
|
|
136
|
+
```
|
|
137
|
+
|
|
138
|
+
## Available Metrics
|
|
139
|
+
|
|
140
|
+
### Lexical Diversity
|
|
141
|
+
- **TTR** - Type-Token Ratio (via stylometry-ttr)
|
|
142
|
+
- **MTLD** - Measure of Textual Lexical Diversity
|
|
143
|
+
- **Yule's K** - Vocabulary repetitiveness
|
|
144
|
+
- **Hapax Legomena** - Words appearing once/twice
|
|
145
|
+
- **Sichel's S** - Hapax-based richness
|
|
146
|
+
- **Honoré's R** - Vocabulary richness constant
|
|
147
|
+
|
|
148
|
+
### Readability
|
|
149
|
+
- **Flesch Reading Ease** - 0-100 difficulty scale
|
|
150
|
+
- **Flesch-Kincaid Grade** - US grade level
|
|
151
|
+
- **SMOG Index** - Years of education needed
|
|
152
|
+
- **Gunning Fog** - Readability complexity
|
|
153
|
+
- **Coleman-Liau** - Character-based grade level
|
|
154
|
+
- **ARI** - Automated Readability Index
|
|
155
|
+
|
|
156
|
+
### Syntactic (requires spaCy)
|
|
157
|
+
- **POS Ratios** - Noun/verb/adjective/adverb ratios
|
|
158
|
+
- **Lexical Density** - Content vs function words
|
|
159
|
+
- **Sentence Statistics** - Length, variation, complexity
|
|
160
|
+
|
|
161
|
+
### Authorship (requires scikit-learn, scipy)
|
|
162
|
+
- **Burrows' Delta** - Author distance measure
|
|
163
|
+
- **Cosine Delta** - Angular distance
|
|
164
|
+
- **Zeta Scores** - Distinctive word usage
|
|
165
|
+
|
|
166
|
+
### N-grams (requires nltk)
|
|
167
|
+
- **Character Bigram Entropy** - Character predictability
|
|
168
|
+
- **Word Bigram Entropy** - Word sequence predictability
|
|
169
|
+
- **Perplexity** - Language model fit
|
|
170
|
+
|
|
171
|
+
## Dependencies
|
|
172
|
+
|
|
173
|
+
**Core (always installed):**
|
|
174
|
+
- stylometry-ttr
|
|
175
|
+
|
|
176
|
+
**Optional:**
|
|
177
|
+
- `readability`: pronouncing (for syllable counting)
|
|
178
|
+
- `syntactic`: spacy>=3.8.0
|
|
179
|
+
- `authorship`: None (pure Python + stdlib)
|
|
180
|
+
- `ngrams`: None (pure Python + stdlib)
|
|
181
|
+
|
|
182
|
+
## Development
|
|
183
|
+
|
|
184
|
+
```bash
|
|
185
|
+
# Clone the repository
|
|
186
|
+
git clone https://github.com/craigtrim/pystylometry
|
|
187
|
+
cd pystylometry
|
|
188
|
+
|
|
189
|
+
# Install with dev dependencies
|
|
190
|
+
pip install -e ".[dev,all]"
|
|
191
|
+
|
|
192
|
+
# Run tests
|
|
193
|
+
make test
|
|
194
|
+
|
|
195
|
+
# Run linters
|
|
196
|
+
make lint
|
|
197
|
+
|
|
198
|
+
# Format code
|
|
199
|
+
make format
|
|
200
|
+
```
|
|
201
|
+
|
|
202
|
+
## Project Status
|
|
203
|
+
|
|
204
|
+
🚧 **Phase 1 - Core Lexical Metrics** (In Progress)
|
|
205
|
+
- [x] Project structure
|
|
206
|
+
- [ ] MTLD implementation
|
|
207
|
+
- [ ] Yule's K implementation
|
|
208
|
+
- [ ] Hapax ratios implementation
|
|
209
|
+
- [ ] Tests
|
|
210
|
+
- [ ] v0.1.0 release
|
|
211
|
+
|
|
212
|
+
See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full roadmap.
|
|
213
|
+
|
|
214
|
+
## Why pystylometry?
|
|
215
|
+
|
|
216
|
+
- **Modular**: Install only what you need
|
|
217
|
+
- **Consistent**: Uniform API across all metrics
|
|
218
|
+
- **Rich Results**: Dataclass objects with metadata, not just numbers
|
|
219
|
+
- **Well-Documented**: Formulas, references, and interpretations
|
|
220
|
+
- **Type-Safe**: Full type hints for IDE support
|
|
221
|
+
- **Tested**: Comprehensive test suite
|
|
222
|
+
|
|
223
|
+
## References
|
|
224
|
+
|
|
225
|
+
See [stylometry-metrics.md](.claude/context/stylometry-metrics.md) for the complete metrics reference table with formulas.
|
|
226
|
+
|
|
227
|
+
## License
|
|
228
|
+
|
|
229
|
+
MIT License - see LICENSE file for details.
|
|
230
|
+
|
|
231
|
+
## Author
|
|
232
|
+
|
|
233
|
+
Craig Trim (craigtrim@gmail.com)
|
|
234
|
+
|
|
235
|
+
## Contributing
|
|
236
|
+
|
|
237
|
+
Contributions welcome! Please open an issue or PR on GitHub.
|
|
238
|
+
|
|
@@ -0,0 +1,26 @@
|
|
|
1
|
+
pystylometry/__init__.py,sha256=gCsVT2Eu9sP20Wmq8KQgIf0fEPdZhs-Dlp1IWWfmC5Y,7454
|
|
2
|
+
pystylometry/_types.py,sha256=lldXONgNhTVH5ZGHUVx7BKez7szR8Svi05BSg0M3VsY,3503
|
|
3
|
+
pystylometry/_utils.py,sha256=UN1hBe0WDIov33dG4di7Br9xIIFUwka6SoeOeJ_aGpg,5116
|
|
4
|
+
pystylometry/authorship/__init__.py,sha256=iXkH94lGd5kCDtk1UOuz3gUfOsAJb4DDcv2evTWSCAA,233
|
|
5
|
+
pystylometry/authorship/burrows_delta.py,sha256=6XC8I7EcBTLbn9BNKZsOtL0otL4vKFX10aHBlU4Bki4,5677
|
|
6
|
+
pystylometry/authorship/zeta.py,sha256=oOi9Y6ZPq15ILLVl6So9O9ERvzig26en6_dpQJWeoOc,4338
|
|
7
|
+
pystylometry/lexical/__init__.py,sha256=eHAEzZytevXnR7f1KY3ADrczdmH3YZ2LZCrUx0vXJJw,416
|
|
8
|
+
pystylometry/lexical/hapax.py,sha256=JVwbd7rWe0iy-KGwAGK-PFN7mLhB6xwa4WFvoqGmfMA,2365
|
|
9
|
+
pystylometry/lexical/mtld.py,sha256=tNtvpHKyf_i4E7wTDYZkG6BIkAVbaCOPZpZDApPLtxE,1850
|
|
10
|
+
pystylometry/lexical/yule.py,sha256=XOdGth-E_rCXVp_Y6ZdTXMW2QATw0NoDbNPuXIZFjas,1898
|
|
11
|
+
pystylometry/ngrams/__init__.py,sha256=pJadMr_VAMOENr-CteCqlgokuCP2QIPku7blMj-9ToI,295
|
|
12
|
+
pystylometry/ngrams/entropy.py,sha256=8WT1YaZHRh_0GOEycBzQOzObHZFme5oMf6_lBaEo4ZU,3876
|
|
13
|
+
pystylometry/readability/__init__.py,sha256=kwVGQyxk1e7W7NnZTjwSJ-0pXmyFYeeoNRSt7bpzjoA,346
|
|
14
|
+
pystylometry/readability/ari.py,sha256=Br1ZJAxtoDoT2fFeKRhQuiwyWpobS9E8_wRyizh5w3c,2257
|
|
15
|
+
pystylometry/readability/coleman_liau.py,sha256=weH4w9HSdLxhUiYEub3WgNjKsbSXKMkq-zEZtaqjyU4,2058
|
|
16
|
+
pystylometry/readability/flesch.py,sha256=6YuoS8Qo6KP6enJKMEZ731mMAOKdz2AnVvzZYNVxZHM,2700
|
|
17
|
+
pystylometry/readability/gunning_fog.py,sha256=5x8pntoYMV2pQAnI3-LBm_Od1LJaTcIxnOJ9ESBNRB8,1984
|
|
18
|
+
pystylometry/readability/smog.py,sha256=5Z-PNmJtnsCQTUfv_vnaQdaUAoK0EatCQJW2ypqav6c,2171
|
|
19
|
+
pystylometry/readability/syllables.py,sha256=BuNm9A9ZSzE7kG3qGzhzw47vzui5DAG1nMis6gbwC4k,1373
|
|
20
|
+
pystylometry/syntactic/__init__.py,sha256=8iJRXTu3VTXnoqIIwyLTWlLvxMweq_stG3FkynpoGVM,217
|
|
21
|
+
pystylometry/syntactic/pos_ratios.py,sha256=tzM4y6vqkZp1Jx2L4JzdWtLBegrlqOUgZrohnwjB3H8,1876
|
|
22
|
+
pystylometry/syntactic/sentence_stats.py,sha256=iMOeXhV6VwDt9lKy66pq8i-zQ4JsmdFdNGAsFR5_c48,1825
|
|
23
|
+
pystylometry/tokenizer.py,sha256=03FEF4kKp72v-ypbtMg8u0WyVJGk3YJx6Nw3SGzyAnA,18166
|
|
24
|
+
pystylometry-0.1.0.dist-info/METADATA,sha256=u8t8BmHbeaoDDzTOFicH0YMW5tt3ljStkIai1iwTBK8,6802
|
|
25
|
+
pystylometry-0.1.0.dist-info/WHEEL,sha256=3ny-bZhpXrU6vSQ1UPG34FoxZBp3lVcvK0LkgUz6VLk,88
|
|
26
|
+
pystylometry-0.1.0.dist-info/RECORD,,
|