pystylometry 0.1.0__tar.gz

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (26) hide show
  1. pystylometry-0.1.0/PKG-INFO +238 -0
  2. pystylometry-0.1.0/README.md +208 -0
  3. pystylometry-0.1.0/pyproject.toml +75 -0
  4. pystylometry-0.1.0/pystylometry/__init__.py +206 -0
  5. pystylometry-0.1.0/pystylometry/_types.py +172 -0
  6. pystylometry-0.1.0/pystylometry/_utils.py +197 -0
  7. pystylometry-0.1.0/pystylometry/authorship/__init__.py +10 -0
  8. pystylometry-0.1.0/pystylometry/authorship/burrows_delta.py +152 -0
  9. pystylometry-0.1.0/pystylometry/authorship/zeta.py +109 -0
  10. pystylometry-0.1.0/pystylometry/lexical/__init__.py +17 -0
  11. pystylometry-0.1.0/pystylometry/lexical/hapax.py +75 -0
  12. pystylometry-0.1.0/pystylometry/lexical/mtld.py +61 -0
  13. pystylometry-0.1.0/pystylometry/lexical/yule.py +66 -0
  14. pystylometry-0.1.0/pystylometry/ngrams/__init__.py +13 -0
  15. pystylometry-0.1.0/pystylometry/ngrams/entropy.py +130 -0
  16. pystylometry-0.1.0/pystylometry/readability/__init__.py +15 -0
  17. pystylometry-0.1.0/pystylometry/readability/ari.py +70 -0
  18. pystylometry-0.1.0/pystylometry/readability/coleman_liau.py +67 -0
  19. pystylometry-0.1.0/pystylometry/readability/flesch.py +81 -0
  20. pystylometry-0.1.0/pystylometry/readability/gunning_fog.py +63 -0
  21. pystylometry-0.1.0/pystylometry/readability/smog.py +71 -0
  22. pystylometry-0.1.0/pystylometry/readability/syllables.py +54 -0
  23. pystylometry-0.1.0/pystylometry/syntactic/__init__.py +9 -0
  24. pystylometry-0.1.0/pystylometry/syntactic/pos_ratios.py +61 -0
  25. pystylometry-0.1.0/pystylometry/syntactic/sentence_stats.py +60 -0
  26. pystylometry-0.1.0/pystylometry/tokenizer.py +598 -0
@@ -0,0 +1,238 @@
1
+ Metadata-Version: 2.4
2
+ Name: pystylometry
3
+ Version: 0.1.0
4
+ Summary: Comprehensive Python package for stylometric analysis
5
+ License: MIT
6
+ Keywords: stylometry,nlp,text-analysis,authorship,readability,lexical-diversity,readability-metrics
7
+ Author: Craig Trim
8
+ Author-email: craigtrim@gmail.com
9
+ Requires-Python: >=3.11,<4.0
10
+ Classifier: Development Status :: 3 - Alpha
11
+ Classifier: Intended Audience :: Developers
12
+ Classifier: Intended Audience :: Science/Research
13
+ Classifier: License :: OSI Approved :: MIT License
14
+ Classifier: Programming Language :: Python :: 3
15
+ Classifier: Programming Language :: Python :: 3.11
16
+ Classifier: Programming Language :: Python :: 3.12
17
+ Classifier: Programming Language :: Python :: 3.13
18
+ Classifier: Programming Language :: Python :: 3.14
19
+ Classifier: Programming Language :: Python :: 3.10
20
+ Classifier: Programming Language :: Python :: 3.9
21
+ Classifier: Topic :: Scientific/Engineering :: Information Analysis
22
+ Classifier: Topic :: Text Processing :: Linguistic
23
+ Classifier: Typing :: Typed
24
+ Requires-Dist: stylometry-ttr (>=1.0.3,<2.0.0)
25
+ Project-URL: Homepage, https://github.com/craigtrim/pystylometry
26
+ Project-URL: Issues, https://github.com/craigtrim/pystylometry/issues
27
+ Project-URL: Repository, https://github.com/craigtrim/pystylometry
28
+ Description-Content-Type: text/markdown
29
+
30
+ # pystylometry
31
+
32
+ [![Python Version](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
33
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
34
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
35
+ [![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
36
+
37
+ A comprehensive Python package for stylometric analysis with modular architecture and optional dependencies.
38
+
39
+ ## Features
40
+
41
+ **pystylometry** provides 50+ metrics across five analysis domains:
42
+
43
+ - **Lexical Diversity**: TTR, MTLD, Yule's K, Hapax ratios, and more
44
+ - **Readability**: Flesch, SMOG, Gunning Fog, Coleman-Liau, ARI
45
+ - **Syntactic Analysis**: POS ratios, sentence statistics (requires spaCy)
46
+ - **Authorship Attribution**: Burrows' Delta, Cosine Delta, Zeta scores
47
+ - **N-gram Analysis**: Character and word bigram entropy, perplexity
48
+
49
+ ## Installation
50
+
51
+ Install only what you need:
52
+
53
+ ```bash
54
+ # Core package (lexical metrics only)
55
+ pip install pystylometry
56
+
57
+ # With readability metrics
58
+ pip install pystylometry[readability]
59
+
60
+ # With syntactic metrics (requires spaCy)
61
+ pip install pystylometry[syntactic]
62
+
63
+ # With authorship metrics
64
+ pip install pystylometry[authorship]
65
+
66
+ # With n-gram analysis
67
+ pip install pystylometry[ngrams]
68
+
69
+ # Everything
70
+ pip install pystylometry[all]
71
+ ```
72
+
73
+ ## Quick Start
74
+
75
+ ### Using Individual Modules
76
+
77
+ ```python
78
+ from pystylometry.lexical import compute_mtld, compute_yule
79
+ from pystylometry.readability import compute_flesch
80
+
81
+ text = "Your text here..."
82
+
83
+ # Lexical diversity
84
+ mtld = compute_mtld(text)
85
+ print(f"MTLD: {mtld.mtld_average:.2f}")
86
+
87
+ yule = compute_yule(text)
88
+ print(f"Yule's K: {yule.yule_k:.2f}")
89
+
90
+ # Readability
91
+ flesch = compute_flesch(text)
92
+ print(f"Reading Ease: {flesch.reading_ease:.1f}")
93
+ print(f"Grade Level: {flesch.grade_level:.1f}")
94
+ ```
95
+
96
+ ### Using the Unified API
97
+
98
+ ```python
99
+ from pystylometry import analyze
100
+
101
+ text = "Your text here..."
102
+
103
+ # Analyze with multiple metrics at once
104
+ results = analyze(text, lexical=True, readability=True)
105
+
106
+ # Access results
107
+ print(f"MTLD: {results.lexical['mtld'].mtld_average:.2f}")
108
+ print(f"Flesch: {results.readability['flesch'].reading_ease:.1f}")
109
+ ```
110
+
111
+ ### Checking Available Modules
112
+
113
+ ```python
114
+ from pystylometry import get_available_modules
115
+
116
+ available = get_available_modules()
117
+ print(available)
118
+ # {'lexical': True, 'readability': True, 'syntactic': False, ...}
119
+ ```
120
+
121
+ ## API Design
122
+
123
+ ### Clean, Consistent Interface
124
+
125
+ Every metric function:
126
+ - Takes text as input
127
+ - Returns a rich result object (never just a float)
128
+ - Includes metadata about the computation
129
+ - Has comprehensive docstrings with formulas and references
130
+
131
+ ```python
132
+ from pystylometry.lexical import compute_yule
133
+
134
+ result = compute_yule(text)
135
+ # Returns: YuleResult(yule_k=..., yule_i=..., metadata={...})
136
+ ```
137
+
138
+ ## Available Metrics
139
+
140
+ ### Lexical Diversity
141
+ - **TTR** - Type-Token Ratio (via stylometry-ttr)
142
+ - **MTLD** - Measure of Textual Lexical Diversity
143
+ - **Yule's K** - Vocabulary repetitiveness
144
+ - **Hapax Legomena** - Words appearing once/twice
145
+ - **Sichel's S** - Hapax-based richness
146
+ - **Honoré's R** - Vocabulary richness constant
147
+
148
+ ### Readability
149
+ - **Flesch Reading Ease** - 0-100 difficulty scale
150
+ - **Flesch-Kincaid Grade** - US grade level
151
+ - **SMOG Index** - Years of education needed
152
+ - **Gunning Fog** - Readability complexity
153
+ - **Coleman-Liau** - Character-based grade level
154
+ - **ARI** - Automated Readability Index
155
+
156
+ ### Syntactic (requires spaCy)
157
+ - **POS Ratios** - Noun/verb/adjective/adverb ratios
158
+ - **Lexical Density** - Content vs function words
159
+ - **Sentence Statistics** - Length, variation, complexity
160
+
161
+ ### Authorship (requires scikit-learn, scipy)
162
+ - **Burrows' Delta** - Author distance measure
163
+ - **Cosine Delta** - Angular distance
164
+ - **Zeta Scores** - Distinctive word usage
165
+
166
+ ### N-grams (requires nltk)
167
+ - **Character Bigram Entropy** - Character predictability
168
+ - **Word Bigram Entropy** - Word sequence predictability
169
+ - **Perplexity** - Language model fit
170
+
171
+ ## Dependencies
172
+
173
+ **Core (always installed):**
174
+ - stylometry-ttr
175
+
176
+ **Optional:**
177
+ - `readability`: pronouncing (for syllable counting)
178
+ - `syntactic`: spacy>=3.8.0
179
+ - `authorship`: None (pure Python + stdlib)
180
+ - `ngrams`: None (pure Python + stdlib)
181
+
182
+ ## Development
183
+
184
+ ```bash
185
+ # Clone the repository
186
+ git clone https://github.com/craigtrim/pystylometry
187
+ cd pystylometry
188
+
189
+ # Install with dev dependencies
190
+ pip install -e ".[dev,all]"
191
+
192
+ # Run tests
193
+ make test
194
+
195
+ # Run linters
196
+ make lint
197
+
198
+ # Format code
199
+ make format
200
+ ```
201
+
202
+ ## Project Status
203
+
204
+ 🚧 **Phase 1 - Core Lexical Metrics** (In Progress)
205
+ - [x] Project structure
206
+ - [ ] MTLD implementation
207
+ - [ ] Yule's K implementation
208
+ - [ ] Hapax ratios implementation
209
+ - [ ] Tests
210
+ - [ ] v0.1.0 release
211
+
212
+ See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full roadmap.
213
+
214
+ ## Why pystylometry?
215
+
216
+ - **Modular**: Install only what you need
217
+ - **Consistent**: Uniform API across all metrics
218
+ - **Rich Results**: Dataclass objects with metadata, not just numbers
219
+ - **Well-Documented**: Formulas, references, and interpretations
220
+ - **Type-Safe**: Full type hints for IDE support
221
+ - **Tested**: Comprehensive test suite
222
+
223
+ ## References
224
+
225
+ See [stylometry-metrics.md](.claude/context/stylometry-metrics.md) for the complete metrics reference table with formulas.
226
+
227
+ ## License
228
+
229
+ MIT License - see LICENSE file for details.
230
+
231
+ ## Author
232
+
233
+ Craig Trim (craigtrim@gmail.com)
234
+
235
+ ## Contributing
236
+
237
+ Contributions welcome! Please open an issue or PR on GitHub.
238
+
@@ -0,0 +1,208 @@
1
+ # pystylometry
2
+
3
+ [![Python Version](https://img.shields.io/badge/python-3.9%2B-blue.svg)](https://www.python.org/downloads/)
4
+ [![License: MIT](https://img.shields.io/badge/License-MIT-yellow.svg)](https://opensource.org/licenses/MIT)
5
+ [![Code style: ruff](https://img.shields.io/badge/code%20style-ruff-000000.svg)](https://github.com/astral-sh/ruff)
6
+ [![PyPI version](https://badge.fury.io/py/pystylometry.svg)](https://badge.fury.io/py/pystylometry)
7
+
8
+ A comprehensive Python package for stylometric analysis with modular architecture and optional dependencies.
9
+
10
+ ## Features
11
+
12
+ **pystylometry** provides 50+ metrics across five analysis domains:
13
+
14
+ - **Lexical Diversity**: TTR, MTLD, Yule's K, Hapax ratios, and more
15
+ - **Readability**: Flesch, SMOG, Gunning Fog, Coleman-Liau, ARI
16
+ - **Syntactic Analysis**: POS ratios, sentence statistics (requires spaCy)
17
+ - **Authorship Attribution**: Burrows' Delta, Cosine Delta, Zeta scores
18
+ - **N-gram Analysis**: Character and word bigram entropy, perplexity
19
+
20
+ ## Installation
21
+
22
+ Install only what you need:
23
+
24
+ ```bash
25
+ # Core package (lexical metrics only)
26
+ pip install pystylometry
27
+
28
+ # With readability metrics
29
+ pip install pystylometry[readability]
30
+
31
+ # With syntactic metrics (requires spaCy)
32
+ pip install pystylometry[syntactic]
33
+
34
+ # With authorship metrics
35
+ pip install pystylometry[authorship]
36
+
37
+ # With n-gram analysis
38
+ pip install pystylometry[ngrams]
39
+
40
+ # Everything
41
+ pip install pystylometry[all]
42
+ ```
43
+
44
+ ## Quick Start
45
+
46
+ ### Using Individual Modules
47
+
48
+ ```python
49
+ from pystylometry.lexical import compute_mtld, compute_yule
50
+ from pystylometry.readability import compute_flesch
51
+
52
+ text = "Your text here..."
53
+
54
+ # Lexical diversity
55
+ mtld = compute_mtld(text)
56
+ print(f"MTLD: {mtld.mtld_average:.2f}")
57
+
58
+ yule = compute_yule(text)
59
+ print(f"Yule's K: {yule.yule_k:.2f}")
60
+
61
+ # Readability
62
+ flesch = compute_flesch(text)
63
+ print(f"Reading Ease: {flesch.reading_ease:.1f}")
64
+ print(f"Grade Level: {flesch.grade_level:.1f}")
65
+ ```
66
+
67
+ ### Using the Unified API
68
+
69
+ ```python
70
+ from pystylometry import analyze
71
+
72
+ text = "Your text here..."
73
+
74
+ # Analyze with multiple metrics at once
75
+ results = analyze(text, lexical=True, readability=True)
76
+
77
+ # Access results
78
+ print(f"MTLD: {results.lexical['mtld'].mtld_average:.2f}")
79
+ print(f"Flesch: {results.readability['flesch'].reading_ease:.1f}")
80
+ ```
81
+
82
+ ### Checking Available Modules
83
+
84
+ ```python
85
+ from pystylometry import get_available_modules
86
+
87
+ available = get_available_modules()
88
+ print(available)
89
+ # {'lexical': True, 'readability': True, 'syntactic': False, ...}
90
+ ```
91
+
92
+ ## API Design
93
+
94
+ ### Clean, Consistent Interface
95
+
96
+ Every metric function:
97
+ - Takes text as input
98
+ - Returns a rich result object (never just a float)
99
+ - Includes metadata about the computation
100
+ - Has comprehensive docstrings with formulas and references
101
+
102
+ ```python
103
+ from pystylometry.lexical import compute_yule
104
+
105
+ result = compute_yule(text)
106
+ # Returns: YuleResult(yule_k=..., yule_i=..., metadata={...})
107
+ ```
108
+
109
+ ## Available Metrics
110
+
111
+ ### Lexical Diversity
112
+ - **TTR** - Type-Token Ratio (via stylometry-ttr)
113
+ - **MTLD** - Measure of Textual Lexical Diversity
114
+ - **Yule's K** - Vocabulary repetitiveness
115
+ - **Hapax Legomena** - Words appearing once/twice
116
+ - **Sichel's S** - Hapax-based richness
117
+ - **Honoré's R** - Vocabulary richness constant
118
+
119
+ ### Readability
120
+ - **Flesch Reading Ease** - 0-100 difficulty scale
121
+ - **Flesch-Kincaid Grade** - US grade level
122
+ - **SMOG Index** - Years of education needed
123
+ - **Gunning Fog** - Readability complexity
124
+ - **Coleman-Liau** - Character-based grade level
125
+ - **ARI** - Automated Readability Index
126
+
127
+ ### Syntactic (requires spaCy)
128
+ - **POS Ratios** - Noun/verb/adjective/adverb ratios
129
+ - **Lexical Density** - Content vs function words
130
+ - **Sentence Statistics** - Length, variation, complexity
131
+
132
+ ### Authorship (requires scikit-learn, scipy)
133
+ - **Burrows' Delta** - Author distance measure
134
+ - **Cosine Delta** - Angular distance
135
+ - **Zeta Scores** - Distinctive word usage
136
+
137
+ ### N-grams (requires nltk)
138
+ - **Character Bigram Entropy** - Character predictability
139
+ - **Word Bigram Entropy** - Word sequence predictability
140
+ - **Perplexity** - Language model fit
141
+
142
+ ## Dependencies
143
+
144
+ **Core (always installed):**
145
+ - stylometry-ttr
146
+
147
+ **Optional:**
148
+ - `readability`: pronouncing (for syllable counting)
149
+ - `syntactic`: spacy>=3.8.0
150
+ - `authorship`: None (pure Python + stdlib)
151
+ - `ngrams`: None (pure Python + stdlib)
152
+
153
+ ## Development
154
+
155
+ ```bash
156
+ # Clone the repository
157
+ git clone https://github.com/craigtrim/pystylometry
158
+ cd pystylometry
159
+
160
+ # Install with dev dependencies
161
+ pip install -e ".[dev,all]"
162
+
163
+ # Run tests
164
+ make test
165
+
166
+ # Run linters
167
+ make lint
168
+
169
+ # Format code
170
+ make format
171
+ ```
172
+
173
+ ## Project Status
174
+
175
+ 🚧 **Phase 1 - Core Lexical Metrics** (In Progress)
176
+ - [x] Project structure
177
+ - [ ] MTLD implementation
178
+ - [ ] Yule's K implementation
179
+ - [ ] Hapax ratios implementation
180
+ - [ ] Tests
181
+ - [ ] v0.1.0 release
182
+
183
+ See [pystylometry-plan.md](.claude/context/pystylometry-plan.md) for the full roadmap.
184
+
185
+ ## Why pystylometry?
186
+
187
+ - **Modular**: Install only what you need
188
+ - **Consistent**: Uniform API across all metrics
189
+ - **Rich Results**: Dataclass objects with metadata, not just numbers
190
+ - **Well-Documented**: Formulas, references, and interpretations
191
+ - **Type-Safe**: Full type hints for IDE support
192
+ - **Tested**: Comprehensive test suite
193
+
194
+ ## References
195
+
196
+ See [stylometry-metrics.md](.claude/context/stylometry-metrics.md) for the complete metrics reference table with formulas.
197
+
198
+ ## License
199
+
200
+ MIT License - see LICENSE file for details.
201
+
202
+ ## Author
203
+
204
+ Craig Trim (craigtrim@gmail.com)
205
+
206
+ ## Contributing
207
+
208
+ Contributions welcome! Please open an issue or PR on GitHub.
@@ -0,0 +1,75 @@
1
+ [tool.poetry]
2
+ name = "pystylometry"
3
+ version = "0.1.0"
4
+ description = "Comprehensive Python package for stylometric analysis"
5
+ authors = ["Craig Trim <craigtrim@gmail.com>"]
6
+ readme = "README.md"
7
+ license = "MIT"
8
+ packages = [{ include = "pystylometry" }]
9
+ keywords = [
10
+ "stylometry",
11
+ "nlp",
12
+ "text-analysis",
13
+ "authorship",
14
+ "readability",
15
+ "lexical-diversity",
16
+ "readability-metrics",
17
+ ]
18
+ classifiers = [
19
+ "Development Status :: 3 - Alpha",
20
+ "Intended Audience :: Science/Research",
21
+ "Intended Audience :: Developers",
22
+ "License :: OSI Approved :: MIT License",
23
+ "Programming Language :: Python :: 3",
24
+ "Programming Language :: Python :: 3.9",
25
+ "Programming Language :: Python :: 3.10",
26
+ "Programming Language :: Python :: 3.11",
27
+ "Programming Language :: Python :: 3.12",
28
+ "Topic :: Text Processing :: Linguistic",
29
+ "Topic :: Scientific/Engineering :: Information Analysis",
30
+ "Typing :: Typed",
31
+ ]
32
+
33
+ [tool.poetry.urls]
34
+ Homepage = "https://github.com/craigtrim/pystylometry"
35
+ Repository = "https://github.com/craigtrim/pystylometry"
36
+ Issues = "https://github.com/craigtrim/pystylometry/issues"
37
+
38
+ [tool.poetry.dependencies]
39
+ python = "^3.11"
40
+ stylometry-ttr = "^1.0.3"
41
+
42
+ [tool.poetry.group.readability.dependencies]
43
+ pronouncing = "^0.2.0"
44
+
45
+ [tool.poetry.group.syntactic.dependencies]
46
+ spacy = "^3.8.0"
47
+
48
+ [tool.poetry.group.dev.dependencies]
49
+ pytest = "^8.0"
50
+ pytest-cov = "^4.0"
51
+ ruff = "^0.1"
52
+ mypy = "^1.0"
53
+
54
+ [build-system]
55
+ requires = ["poetry-core"]
56
+ build-backend = "poetry.core.masonry.api"
57
+
58
+ [tool.pytest.ini_options]
59
+ testpaths = ["tests"]
60
+ python_files = ["test_*.py"]
61
+ addopts = "-v --cov=pystylometry --cov-report=term-missing"
62
+
63
+ [tool.ruff]
64
+ line-length = 100
65
+ target-version = "py39"
66
+
67
+ [tool.ruff.lint]
68
+ select = ["E", "F", "I", "N", "W"]
69
+ ignore = []
70
+
71
+ [tool.mypy]
72
+ python_version = "3.9"
73
+ warn_return_any = true
74
+ warn_unused_configs = true
75
+ disallow_untyped_defs = true