pystylometry 1.0.0__py3-none-any.whl → 1.1.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- pystylometry/__init__.py +29 -3
- pystylometry/_types.py +963 -259
- pystylometry/authorship/__init__.py +23 -2
- pystylometry/authorship/additional_methods.py +4 -29
- pystylometry/authorship/kilgarriff.py +347 -0
- pystylometry/character/character_metrics.py +267 -179
- pystylometry/cli.py +427 -0
- pystylometry/consistency/__init__.py +57 -0
- pystylometry/consistency/_thresholds.py +162 -0
- pystylometry/consistency/drift.py +549 -0
- pystylometry/dialect/__init__.py +65 -0
- pystylometry/dialect/_data/dialect_markers.json +1134 -0
- pystylometry/dialect/_loader.py +360 -0
- pystylometry/dialect/detector.py +533 -0
- pystylometry/lexical/advanced_diversity.py +61 -22
- pystylometry/lexical/function_words.py +255 -56
- pystylometry/lexical/hapax.py +182 -52
- pystylometry/lexical/mtld.py +108 -26
- pystylometry/lexical/ttr.py +76 -10
- pystylometry/lexical/word_frequency_sophistication.py +1522 -298
- pystylometry/lexical/yule.py +136 -50
- pystylometry/ngrams/entropy.py +150 -49
- pystylometry/readability/additional_formulas.py +1887 -762
- pystylometry/readability/ari.py +144 -82
- pystylometry/readability/coleman_liau.py +136 -109
- pystylometry/readability/flesch.py +177 -73
- pystylometry/readability/gunning_fog.py +165 -161
- pystylometry/readability/smog.py +123 -42
- pystylometry/syntactic/advanced_syntactic.py +76 -14
- pystylometry/syntactic/pos_ratios.py +70 -6
- pystylometry/syntactic/sentence_stats.py +55 -12
- pystylometry/syntactic/sentence_types.py +71 -15
- pystylometry/viz/__init__.py +71 -0
- pystylometry/viz/drift.py +589 -0
- pystylometry/viz/jsx/__init__.py +31 -0
- pystylometry/viz/jsx/_base.py +144 -0
- pystylometry/viz/jsx/report.py +677 -0
- pystylometry/viz/jsx/timeline.py +716 -0
- pystylometry/viz/jsx/viewer.py +1032 -0
- {pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/METADATA +5 -2
- pystylometry-1.1.0.dist-info/RECORD +63 -0
- {pystylometry-1.0.0.dist-info → pystylometry-1.1.0.dist-info}/WHEEL +1 -1
- pystylometry-1.1.0.dist-info/entry_points.txt +4 -0
- pystylometry-1.0.0.dist-info/RECORD +0 -46
pystylometry/__init__.py
CHANGED
|
@@ -2,7 +2,7 @@
|
|
|
2
2
|
pystylometry - Comprehensive Python package for stylometric analysis.
|
|
3
3
|
|
|
4
4
|
A modular package for text analysis with lexical, readability, syntactic,
|
|
5
|
-
authorship,
|
|
5
|
+
authorship, n-gram, dialect detection, and consistency analysis metrics.
|
|
6
6
|
|
|
7
7
|
Installation:
|
|
8
8
|
pip install pystylometry # Core (lexical only)
|
|
@@ -16,7 +16,9 @@ Usage:
|
|
|
16
16
|
from pystylometry.lexical import compute_mtld, compute_yule
|
|
17
17
|
from pystylometry.readability import compute_flesch
|
|
18
18
|
from pystylometry.syntactic import compute_pos_ratios
|
|
19
|
-
from pystylometry.authorship import compute_burrows_delta
|
|
19
|
+
from pystylometry.authorship import compute_burrows_delta, compute_kilgarriff
|
|
20
|
+
from pystylometry.consistency import compute_kilgarriff_drift
|
|
21
|
+
from pystylometry.dialect import compute_dialect
|
|
20
22
|
|
|
21
23
|
# Or use the unified analyze() function
|
|
22
24
|
from pystylometry import analyze
|
|
@@ -24,6 +26,18 @@ Usage:
|
|
|
24
26
|
results = analyze(text, lexical=True, readability=True)
|
|
25
27
|
print(results.lexical['mtld'].mtld_average)
|
|
26
28
|
print(results.readability['flesch'].reading_ease)
|
|
29
|
+
|
|
30
|
+
# Dialect detection
|
|
31
|
+
result = compute_dialect("The colour of the programme was brilliant.")
|
|
32
|
+
print(result.dialect) # 'british'
|
|
33
|
+
print(result.british_score) # 0.85
|
|
34
|
+
|
|
35
|
+
# Consistency analysis (Style Drift Detector - Issue #36)
|
|
36
|
+
from pystylometry.consistency import compute_kilgarriff_drift
|
|
37
|
+
|
|
38
|
+
result = compute_kilgarriff_drift(long_document)
|
|
39
|
+
print(result.pattern) # 'consistent', 'sudden_spike', 'suspiciously_uniform', etc.
|
|
40
|
+
print(result.pattern_confidence)
|
|
27
41
|
"""
|
|
28
42
|
|
|
29
43
|
from ._types import AnalysisResult
|
|
@@ -49,14 +63,18 @@ try:
|
|
|
49
63
|
except ImportError:
|
|
50
64
|
_SYNTACTIC_AVAILABLE = False
|
|
51
65
|
|
|
52
|
-
# Authorship and
|
|
66
|
+
# Authorship, ngrams, dialect, and consistency use only stdlib (no external dependencies)
|
|
53
67
|
from . import (
|
|
54
68
|
authorship, # noqa: F401
|
|
69
|
+
consistency, # noqa: F401 - Style drift detection (Issue #36)
|
|
70
|
+
dialect, # noqa: F401
|
|
55
71
|
ngrams, # noqa: F401
|
|
56
72
|
)
|
|
57
73
|
|
|
58
74
|
_AUTHORSHIP_AVAILABLE = True
|
|
59
75
|
_NGRAMS_AVAILABLE = True
|
|
76
|
+
_DIALECT_AVAILABLE = True
|
|
77
|
+
_CONSISTENCY_AVAILABLE = True
|
|
60
78
|
|
|
61
79
|
|
|
62
80
|
def analyze(
|
|
@@ -177,6 +195,8 @@ def get_available_modules() -> dict[str, bool]:
|
|
|
177
195
|
>>> available = get_available_modules()
|
|
178
196
|
>>> if available['readability']:
|
|
179
197
|
... from pystylometry.readability import compute_flesch
|
|
198
|
+
>>> if available['consistency']:
|
|
199
|
+
... from pystylometry.consistency import compute_kilgarriff_drift
|
|
180
200
|
"""
|
|
181
201
|
return {
|
|
182
202
|
"lexical": True, # Always available
|
|
@@ -184,6 +204,8 @@ def get_available_modules() -> dict[str, bool]:
|
|
|
184
204
|
"syntactic": _SYNTACTIC_AVAILABLE,
|
|
185
205
|
"authorship": _AUTHORSHIP_AVAILABLE,
|
|
186
206
|
"ngrams": _NGRAMS_AVAILABLE,
|
|
207
|
+
"dialect": _DIALECT_AVAILABLE,
|
|
208
|
+
"consistency": _CONSISTENCY_AVAILABLE, # Style drift detection (Issue #36)
|
|
187
209
|
}
|
|
188
210
|
|
|
189
211
|
|
|
@@ -203,3 +225,7 @@ if _AUTHORSHIP_AVAILABLE:
|
|
|
203
225
|
__all__.append("authorship")
|
|
204
226
|
if _NGRAMS_AVAILABLE:
|
|
205
227
|
__all__.append("ngrams")
|
|
228
|
+
if _DIALECT_AVAILABLE:
|
|
229
|
+
__all__.append("dialect")
|
|
230
|
+
if _CONSISTENCY_AVAILABLE:
|
|
231
|
+
__all__.append("consistency")
|