pystylometry 1.0.0__py3-none-any.whl → 1.3.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystylometry/README.md +42 -0
- pystylometry/__init__.py +45 -3
- pystylometry/_types.py +1017 -259
- pystylometry/authorship/README.md +21 -0
- pystylometry/authorship/__init__.py +28 -4
- pystylometry/authorship/additional_methods.py +260 -40
- pystylometry/authorship/compression.py +175 -0
- pystylometry/authorship/kilgarriff.py +354 -0
- pystylometry/character/README.md +17 -0
- pystylometry/character/character_metrics.py +267 -179
- pystylometry/cli.py +427 -0
- pystylometry/consistency/README.md +27 -0
- pystylometry/consistency/__init__.py +57 -0
- pystylometry/consistency/_thresholds.py +162 -0
- pystylometry/consistency/drift.py +549 -0
- pystylometry/dialect/README.md +26 -0
- pystylometry/dialect/__init__.py +65 -0
- pystylometry/dialect/_data/dialect_markers.json +1134 -0
- pystylometry/dialect/_loader.py +360 -0
- pystylometry/dialect/detector.py +533 -0
- pystylometry/lexical/README.md +23 -0
- pystylometry/lexical/advanced_diversity.py +61 -22
- pystylometry/lexical/function_words.py +255 -56
- pystylometry/lexical/hapax.py +182 -52
- pystylometry/lexical/mtld.py +108 -26
- pystylometry/lexical/ttr.py +76 -10
- pystylometry/lexical/word_frequency_sophistication.py +1522 -298
- pystylometry/lexical/yule.py +136 -50
- pystylometry/ngrams/README.md +18 -0
- pystylometry/ngrams/entropy.py +150 -49
- pystylometry/ngrams/extended_ngrams.py +314 -69
- pystylometry/prosody/README.md +17 -0
- pystylometry/prosody/rhythm_prosody.py +773 -11
- pystylometry/readability/README.md +23 -0
- pystylometry/readability/additional_formulas.py +1887 -762
- pystylometry/readability/ari.py +144 -82
- pystylometry/readability/coleman_liau.py +136 -109
- pystylometry/readability/flesch.py +177 -73
- pystylometry/readability/gunning_fog.py +165 -161
- pystylometry/readability/smog.py +123 -42
- pystylometry/stylistic/README.md +20 -0
- pystylometry/stylistic/cohesion_coherence.py +669 -13
- pystylometry/stylistic/genre_register.py +1560 -17
- pystylometry/stylistic/markers.py +611 -17
- pystylometry/stylistic/vocabulary_overlap.py +354 -13
- pystylometry/syntactic/README.md +20 -0
- pystylometry/syntactic/advanced_syntactic.py +76 -14
- pystylometry/syntactic/pos_ratios.py +70 -6
- pystylometry/syntactic/sentence_stats.py +55 -12
- pystylometry/syntactic/sentence_types.py +71 -15
- pystylometry/viz/README.md +27 -0
- pystylometry/viz/__init__.py +71 -0
- pystylometry/viz/drift.py +589 -0
- pystylometry/viz/jsx/__init__.py +31 -0
- pystylometry/viz/jsx/_base.py +144 -0
- pystylometry/viz/jsx/report.py +677 -0
- pystylometry/viz/jsx/timeline.py +716 -0
- pystylometry/viz/jsx/viewer.py +1032 -0
- pystylometry-1.3.0.dist-info/METADATA +136 -0
- pystylometry-1.3.0.dist-info/RECORD +76 -0
- {pystylometry-1.0.0.dist-info → pystylometry-1.3.0.dist-info}/WHEEL +1 -1
- pystylometry-1.3.0.dist-info/entry_points.txt +4 -0
- pystylometry-1.0.0.dist-info/METADATA +0 -275
- pystylometry-1.0.0.dist-info/RECORD +0 -46
pystylometry/README.md
ADDED
|
@@ -0,0 +1,42 @@
|
|
|
1
|
+
# pystylometry
|
|
2
|
+
|
|
3
|
+

|
|
4
|
+

|
|
5
|
+
|
|
6
|
+
Core package for stylometric analysis and authorship attribution.
|
|
7
|
+
|
|
8
|
+
## Module Map
|
|
9
|
+
|
|
10
|
+
| Module | Purpose | Key Functions |
|
|
11
|
+
|--------|---------|---------------|
|
|
12
|
+
| [`lexical/`](lexical/) | Vocabulary diversity & richness | `compute_mtld`, `compute_yule`, `compute_ttr`, `compute_hapax_ratios` |
|
|
13
|
+
| [`readability/`](readability/) | Text readability scoring | `compute_flesch`, `compute_gunning_fog`, `compute_ari`, `compute_smog` |
|
|
14
|
+
| [`syntactic/`](syntactic/) | Sentence & parse structure | `compute_pos_ratios`, `compute_sentence_types`, `compute_advanced_syntactic` |
|
|
15
|
+
| [`authorship/`](authorship/) | Author attribution & comparison | `compute_burrows_delta`, `compute_kilgarriff`, `compute_compression_distance` |
|
|
16
|
+
| [`stylistic/`](stylistic/) | Style markers & vocabulary overlap | `compute_stylistic_markers`, `compute_vocabulary_overlap`, `compute_genre_register` |
|
|
17
|
+
| [`character/`](character/) | Character-level features | `compute_character_metrics` |
|
|
18
|
+
| [`ngrams/`](ngrams/) | N-gram entropy & sequences | `compute_extended_ngrams`, `compute_ngram_entropy` |
|
|
19
|
+
| [`dialect/`](dialect/) | Regional dialect detection | `compute_dialect` |
|
|
20
|
+
| [`consistency/`](consistency/) | Intra-document drift detection | `compute_kilgarriff_drift` |
|
|
21
|
+
| [`prosody/`](prosody/) | Rhythm & stress patterns | `compute_rhythm_prosody` |
|
|
22
|
+
| [`viz/`](viz/) | Visualization (PNG & interactive HTML) | `plot_drift_timeline`, `export_drift_report_jsx` |
|
|
23
|
+
|
|
24
|
+
## Shared Internals
|
|
25
|
+
|
|
26
|
+
| File | Purpose |
|
|
27
|
+
|------|---------|
|
|
28
|
+
| `_types.py` | All dataclass result types (e.g. `FleschResult`, `MTLDResult`, `KilgarriffDriftResult`) |
|
|
29
|
+
| `_normalize.py` | Text normalization for readability and stylometry pipelines |
|
|
30
|
+
| `_utils.py` | Shared tokenization and helper functions |
|
|
31
|
+
| `tokenizer.py` | Configurable tokenizer with sentence/word splitting |
|
|
32
|
+
| `cli.py` | Command-line interface (`pystylometry analyze`) |
|
|
33
|
+
|
|
34
|
+
## Installation Extras
|
|
35
|
+
|
|
36
|
+
```
|
|
37
|
+
pip install pystylometry # Core (lexical only)
|
|
38
|
+
pip install pystylometry[readability] # + readability
|
|
39
|
+
pip install pystylometry[syntactic] # + syntactic (requires spaCy)
|
|
40
|
+
pip install pystylometry[authorship] # + authorship attribution
|
|
41
|
+
pip install pystylometry[all] # Everything
|
|
42
|
+
```
|
pystylometry/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
pystylometry - Comprehensive Python package for stylometric analysis.
|
|
3
3
|
|
|
4
4
|
A modular package for text analysis with lexical, readability, syntactic,
|
|
5
|
-
authorship,
|
|
5
|
+
authorship, n-gram, dialect detection, and consistency analysis metrics.
|
|
6
6
|
|
|
7
7
|
Installation:
|
|
8
8
|
pip install pystylometry # Core (lexical only)
|
|
@@ -16,7 +16,9 @@ Usage:
|
|
|
16
16
|
from pystylometry.lexical import compute_mtld, compute_yule
|
|
17
17
|
from pystylometry.readability import compute_flesch
|
|
18
18
|
from pystylometry.syntactic import compute_pos_ratios
|
|
19
|
-
from pystylometry.authorship import compute_burrows_delta
|
|
19
|
+
from pystylometry.authorship import compute_burrows_delta, compute_kilgarriff
|
|
20
|
+
from pystylometry.consistency import compute_kilgarriff_drift
|
|
21
|
+
from pystylometry.dialect import compute_dialect
|
|
20
22
|
|
|
21
23
|
# Or use the unified analyze() function
|
|
22
24
|
from pystylometry import analyze
|
|
@@ -24,6 +26,18 @@ Usage:
|
|
|
24
26
|
results = analyze(text, lexical=True, readability=True)
|
|
25
27
|
print(results.lexical['mtld'].mtld_average)
|
|
26
28
|
print(results.readability['flesch'].reading_ease)
|
|
29
|
+
|
|
30
|
+
# Dialect detection
|
|
31
|
+
result = compute_dialect("The colour of the programme was brilliant.")
|
|
32
|
+
print(result.dialect) # 'british'
|
|
33
|
+
print(result.british_score) # 0.85
|
|
34
|
+
|
|
35
|
+
# Consistency analysis (Style Drift Detector - Issue #36)
|
|
36
|
+
from pystylometry.consistency import compute_kilgarriff_drift
|
|
37
|
+
|
|
38
|
+
result = compute_kilgarriff_drift(long_document)
|
|
39
|
+
print(result.pattern) # 'consistent', 'sudden_spike', 'suspiciously_uniform', etc.
|
|
40
|
+
print(result.pattern_confidence)
|
|
27
41
|
"""
|
|
28
42
|
|
|
29
43
|
from ._types import AnalysisResult
|
|
@@ -49,14 +63,28 @@ try:
|
|
|
49
63
|
except ImportError:
|
|
50
64
|
_SYNTACTIC_AVAILABLE = False
|
|
51
65
|
|
|
52
|
-
#
|
|
66
|
+
# Prosody requires pronouncing (CMU dictionary) - same dependency as readability
|
|
67
|
+
try:
|
|
68
|
+
from . import prosody # noqa: F401 - Rhythm and prosody metrics (Issue #25)
|
|
69
|
+
|
|
70
|
+
_PROSODY_AVAILABLE = True
|
|
71
|
+
except ImportError:
|
|
72
|
+
_PROSODY_AVAILABLE = False
|
|
73
|
+
|
|
74
|
+
# Authorship, ngrams, dialect, consistency, and stylistic use only stdlib (no external dependencies)
|
|
53
75
|
from . import (
|
|
54
76
|
authorship, # noqa: F401
|
|
77
|
+
consistency, # noqa: F401 - Style drift detection (Issue #36)
|
|
78
|
+
dialect, # noqa: F401
|
|
55
79
|
ngrams, # noqa: F401
|
|
80
|
+
stylistic, # noqa: F401 - Vocabulary overlap and similarity (Issue #21)
|
|
56
81
|
)
|
|
57
82
|
|
|
58
83
|
_AUTHORSHIP_AVAILABLE = True
|
|
59
84
|
_NGRAMS_AVAILABLE = True
|
|
85
|
+
_DIALECT_AVAILABLE = True
|
|
86
|
+
_CONSISTENCY_AVAILABLE = True
|
|
87
|
+
_STYLISTIC_AVAILABLE = True
|
|
60
88
|
|
|
61
89
|
|
|
62
90
|
def analyze(
|
|
@@ -177,6 +205,8 @@ def get_available_modules() -> dict[str, bool]:
|
|
|
177
205
|
>>> available = get_available_modules()
|
|
178
206
|
>>> if available['readability']:
|
|
179
207
|
... from pystylometry.readability import compute_flesch
|
|
208
|
+
>>> if available['consistency']:
|
|
209
|
+
... from pystylometry.consistency import compute_kilgarriff_drift
|
|
180
210
|
"""
|
|
181
211
|
return {
|
|
182
212
|
"lexical": True, # Always available
|
|
@@ -184,6 +214,10 @@ def get_available_modules() -> dict[str, bool]:
|
|
|
184
214
|
"syntactic": _SYNTACTIC_AVAILABLE,
|
|
185
215
|
"authorship": _AUTHORSHIP_AVAILABLE,
|
|
186
216
|
"ngrams": _NGRAMS_AVAILABLE,
|
|
217
|
+
"dialect": _DIALECT_AVAILABLE,
|
|
218
|
+
"consistency": _CONSISTENCY_AVAILABLE, # Style drift detection (Issue #36)
|
|
219
|
+
"stylistic": _STYLISTIC_AVAILABLE, # Vocabulary overlap (Issue #21)
|
|
220
|
+
"prosody": _PROSODY_AVAILABLE, # Rhythm and prosody (Issue #25)
|
|
187
221
|
}
|
|
188
222
|
|
|
189
223
|
|
|
@@ -203,3 +237,11 @@ if _AUTHORSHIP_AVAILABLE:
|
|
|
203
237
|
__all__.append("authorship")
|
|
204
238
|
if _NGRAMS_AVAILABLE:
|
|
205
239
|
__all__.append("ngrams")
|
|
240
|
+
if _DIALECT_AVAILABLE:
|
|
241
|
+
__all__.append("dialect")
|
|
242
|
+
if _CONSISTENCY_AVAILABLE:
|
|
243
|
+
__all__.append("consistency")
|
|
244
|
+
if _STYLISTIC_AVAILABLE:
|
|
245
|
+
__all__.append("stylistic")
|
|
246
|
+
if _PROSODY_AVAILABLE:
|
|
247
|
+
__all__.append("prosody")
|